// // Copyright 2024 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // package x86_64 import ( `bytes` `errors` `fmt` `math` `strconv` `strings` `unicode` `github.com/cloudwego/iasm/expr` ) type ( _TokenKind int _Punctuation int ) const ( _T_end _TokenKind = iota + 1 _T_int _T_name _T_punc _T_space ) const ( _P_plus _Punctuation = iota + 1 _P_minus _P_star _P_slash _P_percent _P_amp _P_bar _P_caret _P_shl _P_shr _P_tilde _P_lbrk _P_rbrk _P_dot _P_comma _P_colon _P_dollar _P_hash ) var _PUNC_NAME = map[_Punctuation]string { _P_plus : "+", _P_minus : "-", _P_star : "*", _P_slash : "/", _P_percent : "%", _P_amp : "&", _P_bar : "|", _P_caret : "^", _P_shl : "<<", _P_shr : ">>", _P_tilde : "~", _P_lbrk : "(", _P_rbrk : ")", _P_dot : ".", _P_comma : ",", _P_colon : ":", _P_dollar : "$", _P_hash : "#", } func (self _Punctuation) String() string { if v, ok := _PUNC_NAME[self]; ok { return v } else { return fmt.Sprintf("_Punctuation(%d)", self) } } type _Token struct { pos int end int u64 uint64 str string tag _TokenKind } func (self *_Token) punc() _Punctuation { return _Punctuation(self.u64) } func (self *_Token) String() string { switch self.tag { case _T_end : return "" case _T_int : return fmt.Sprintf("", self.u64) case _T_punc : return fmt.Sprintf("", _Punctuation(self.u64)) case _T_name : return fmt.Sprintf("", strconv.QuoteToASCII(self.str)) case _T_space : return "" default : return fmt.Sprintf("", self.tag, self.u64, strconv.QuoteToASCII(self.str)) } } func tokenEnd(p int, end int) _Token { return _Token { pos: p, end: end, tag: _T_end, } } func tokenInt(p int, val uint64) _Token { return _Token { pos: p, u64: val, tag: _T_int, } } func tokenName(p int, name string) _Token { return _Token { pos: p, str: name, tag: _T_name, } } func tokenPunc(p int, punc _Punctuation) _Token { return _Token { pos: p, tag: _T_punc, u64: uint64(punc), } } func tokenSpace(p int, end int) _Token { return _Token { pos: p, end: end, tag: _T_space, } } // SyntaxError represents an error in the assembly syntax. type SyntaxError struct { Pos int Row int Src []rune Reason string } // Error implements the error interface. func (self *SyntaxError) Error() string { if self.Pos < 0 { return fmt.Sprintf("%s at line %d", self.Reason, self.Row) } else { return fmt.Sprintf("%s at %d:%d", self.Reason, self.Row, self.Pos + 1) } } type _Tokenizer struct { pos int row int src []rune } func (self *_Tokenizer) ch() rune { return self.src[self.pos] } func (self *_Tokenizer) eof() bool { return self.pos >= len(self.src) } func (self *_Tokenizer) rch() (ret rune) { ret, self.pos = self.src[self.pos], self.pos + 1 return } func (self *_Tokenizer) err(pos int, msg string) *SyntaxError { return &SyntaxError { Pos : pos, Row : self.row, Src : self.src, Reason : msg, } } type _TrimState int const ( _TS_normal _TrimState = iota _TS_slcomm _TS_hscomm _TS_string _TS_escape _TS_accept _TS_nolast ) func (self *_Tokenizer) init(src string) { var i int var ch rune var st _TrimState /* set the source */ self.pos = 0 self.src = []rune(src) /* remove commends, including "//" and "##" */ loop: for i, ch = range self.src { switch { case st == _TS_normal && ch == '/' : st = _TS_slcomm case st == _TS_normal && ch == '"' : st = _TS_string case st == _TS_normal && ch == ';' : st = _TS_accept; break loop case st == _TS_normal && ch == '#' : st = _TS_hscomm case st == _TS_slcomm && ch == '/' : st = _TS_nolast; break loop case st == _TS_slcomm : st = _TS_normal case st == _TS_hscomm && ch == '#' : st = _TS_nolast; break loop case st == _TS_hscomm : st = _TS_normal case st == _TS_string && ch == '"' : st = _TS_normal case st == _TS_string && ch == '\\' : st = _TS_escape case st == _TS_escape : st = _TS_string } } /* check for errors */ switch st { case _TS_accept: self.src = self.src[:i] case _TS_nolast: self.src = self.src[:i - 1] case _TS_string: panic(self.err(i, "string is not terminated")) case _TS_escape: panic(self.err(i, "escape sequence is not terminated")) } } func (self *_Tokenizer) skip(check func(v rune) bool) { for !self.eof() && check(self.ch()) { self.pos++ } } func (self *_Tokenizer) find(pos int, check func(v rune) bool) string { self.skip(check) return string(self.src[pos:self.pos]) } func (self *_Tokenizer) chrv(p int) _Token { var err error var val uint64 /* starting and ending position */ p0 := p + 1 p1 := p0 + 1 /* find the end of the literal */ for p1 < len(self.src) && self.src[p1] != '\'' { if p1++; self.src[p1 - 1] == '\\' { p1++ } } /* empty literal */ if p1 == p0 { panic(self.err(p1, "empty character constant")) } /* check for EOF */ if p1 == len(self.src) { panic(self.err(p1, "unexpected EOF when scanning literals")) } /* parse the literal */ if val, err = literal64(string(self.src[p0:p1])); err != nil { panic(self.err(p0, "cannot parse literal: " + err.Error())) } /* skip the closing '\'' */ self.pos = p1 + 1 return tokenInt(p, val) } func (self *_Tokenizer) numv(p int) _Token { if val, err := strconv.ParseUint(self.find(p, isnumber), 0, 64); err != nil { panic(self.err(p, "invalid immediate value: " + err.Error())) } else { return tokenInt(p, val) } } func (self *_Tokenizer) defv(p int, cc rune) _Token { if isdigit(cc) { return self.numv(p) } else if isident0(cc) { return tokenName(p, self.find(p, isident)) } else { panic(self.err(p, "invalid char: " + strconv.QuoteRune(cc))) } } func (self *_Tokenizer) rep2(p int, pp _Punctuation, cc rune) _Token { if self.eof() { panic(self.err(self.pos, "unexpected EOF when scanning operators")) } else if c := self.rch(); c != cc { panic(self.err(p + 1, strconv.QuoteRune(cc) + " expected, got " + strconv.QuoteRune(c))) } else { return tokenPunc(p, pp) } } func (self *_Tokenizer) read() _Token { var p int var c rune var t _Token /* check for EOF */ if self.eof() { return tokenEnd(self.pos, self.pos) } /* skip spaces as needed */ if p = self.pos; unicode.IsSpace(self.src[p]) { self.skip(unicode.IsSpace) return tokenSpace(p, self.pos) } /* check for line comments */ if p = self.pos; p < len(self.src) - 1 && self.src[p] == '/' && self.src[p + 1] == '/' { self.pos = len(self.src) return tokenEnd(p, self.pos) } /* read the next character */ p = self.pos c = self.rch() /* parse the next character */ switch c { case '+' : t = tokenPunc(p, _P_plus) case '-' : t = tokenPunc(p, _P_minus) case '*' : t = tokenPunc(p, _P_star) case '/' : t = tokenPunc(p, _P_slash) case '%' : t = tokenPunc(p, _P_percent) case '&' : t = tokenPunc(p, _P_amp) case '|' : t = tokenPunc(p, _P_bar) case '^' : t = tokenPunc(p, _P_caret) case '<' : t = self.rep2(p, _P_shl, '<') case '>' : t = self.rep2(p, _P_shr, '>') case '~' : t = tokenPunc(p, _P_tilde) case '(' : t = tokenPunc(p, _P_lbrk) case ')' : t = tokenPunc(p, _P_rbrk) case '.' : t = tokenPunc(p, _P_dot) case ',' : t = tokenPunc(p, _P_comma) case ':' : t = tokenPunc(p, _P_colon) case '$' : t = tokenPunc(p, _P_dollar) case '#' : t = tokenPunc(p, _P_hash) case '\'' : t = self.chrv(p) default : t = self.defv(p, c) } /* mark the end of token */ t.end = self.pos return t } func (self *_Tokenizer) next() (tk _Token) { for { if tk = self.read(); tk.tag != _T_space { return } } } // LabelKind indicates the type of label reference. type LabelKind int // OperandKind indicates the type of the operand. type OperandKind int // InstructionPrefix indicates the prefix bytes prepended to the instruction. type InstructionPrefix byte const ( // OpImm means the operand is an immediate value. OpImm OperandKind = 1 << iota // OpReg means the operand is a register. OpReg // OpMem means the operand is a memory address. OpMem // OpLabel means the operand is a label, specifically for // branch instructions. OpLabel ) const ( // Declaration means the label is a declaration. Declaration LabelKind = iota + 1 // BranchTarget means the label should be treated as a branch target. BranchTarget // RelativeAddress means the label should be treated as a reference to // the code section (e.g. RIP-relative addressing). RelativeAddress ) const ( // PrefixLock causes the processor's LOCK# signal to be asserted during execution of // the accompanying instruction (turns the instruction into an atomic instruction). // In a multiprocessor environment, the LOCK# signal insures that the processor // has exclusive use of any shared memory while the signal is asserted. PrefixLock InstructionPrefix = iota // PrefixSegmentCS overrides the memory operation of this instruction to CS (Code Segment). PrefixSegmentCS // PrefixSegmentDS overrides the memory operation of this instruction to DS (Data Segment), // this is the default section for most instructions if not specified. PrefixSegmentDS // PrefixSegmentES overrides the memory operation of this instruction to ES (Extra Segment). PrefixSegmentES // PrefixSegmentFS overrides the memory operation of this instruction to FS. PrefixSegmentFS // PrefixSegmentGS overrides the memory operation of this instruction to GS. PrefixSegmentGS // PrefixSegmentSS overrides the memory operation of this instruction to SS (Stack Segment). PrefixSegmentSS ) // ParsedLabel represents a label in the source, either a jump target or // an RIP-relative addressing. type ParsedLabel struct { Name string Kind LabelKind } // ParsedOperand represents an operand of an instruction in the source. type ParsedOperand struct { Op OperandKind Imm int64 Reg Register Label ParsedLabel Memory MemoryAddress } // ParsedInstruction represents an instruction in the source. type ParsedInstruction struct { Mnemonic string Operands []ParsedOperand Prefixes []InstructionPrefix } func (self *ParsedInstruction) imm(v int64) { self.Operands = append(self.Operands, ParsedOperand { Op : OpImm, Imm : v, }) } func (self *ParsedInstruction) reg(v Register) { self.Operands = append(self.Operands, ParsedOperand { Op : OpReg, Reg : v, }) } func (self *ParsedInstruction) mem(v MemoryAddress) { self.Operands = append(self.Operands, ParsedOperand { Op : OpMem, Memory : v, }) } func (self *ParsedInstruction) target(v string) { self.Operands = append(self.Operands, ParsedOperand { Op : OpLabel, Label : ParsedLabel { Name: v, Kind: BranchTarget, }, }) } func (self *ParsedInstruction) reference(v string) { self.Operands = append(self.Operands, ParsedOperand { Op : OpLabel, Label : ParsedLabel { Name: v, Kind: RelativeAddress, }, }) } // LineKind indicates the type of ParsedLine. type LineKind int const ( // LineLabel means the ParsedLine is a label. LineLabel LineKind = iota + 1 // LineInstr means the ParsedLine is an instruction. LineInstr // LineCommand means the ParsedLine is a ParsedCommand. LineCommand ) // ParsedLine represents a parsed source line. type ParsedLine struct { Row int Src []rune Kind LineKind Label ParsedLabel Command ParsedCommand Instruction ParsedInstruction } // ParsedCommand represents a parsed assembly directive command. type ParsedCommand struct { Cmd string Args []ParsedCommandArg } // ParsedCommandArg represents an argument of a ParsedCommand. type ParsedCommandArg struct { Value string IsString bool } // Parser parses the source, and generates a sequence of ParsedInstruction's. type Parser struct { lex _Tokenizer exp expr.Parser } const ( rip Register64 = 0xff ) var _RegBranch = map[string]bool { "jmp" : true, "jmpq" : true, "call" : true, "callq" : true, } var _SegPrefix = map[string]InstructionPrefix { "cs": PrefixSegmentCS, "ds": PrefixSegmentDS, "es": PrefixSegmentES, "fs": PrefixSegmentFS, "gs": PrefixSegmentGS, "ss": PrefixSegmentSS, } func (self *Parser) i32(tk _Token, v int64) int32 { if v >= math.MinInt32 && v <= math.MaxUint32 { return int32(v) } else { panic(self.err(tk.pos, fmt.Sprintf("32-bit integer out ouf range: %d", v))) } } func (self *Parser) err(pos int, msg string) *SyntaxError { return &SyntaxError { Pos : pos, Row : self.lex.row, Src : self.lex.src, Reason : msg, } } func (self *Parser) negv() int64 { tk := self.lex.read() tt := tk.tag /* must be an integer */ if tt != _T_int { panic(self.err(tk.pos, "integer expected after '-'")) } else { return -int64(tk.u64) } } func (self *Parser) eval(p int) (r int64) { var e error var v *expr.Expr /* searching start */ n := 1 q := p + 1 /* find the end of expression */ for n > 0 && q < len(self.lex.src) { switch self.lex.src[q] { case '(' : q++; n++ case ')' : q++; n-- default : q++ } } /* check for EOF */ if n != 0 { panic(self.err(q, "unexpected EOF when parsing expressions")) } /* evaluate the expression */ if v, e = self.exp.SetSource(string(self.lex.src[p:q - 1])).Parse(nil); e != nil { panic(self.err(p, "cannot evaluate expression: " + e.Error())) } /* evaluate the expression */ if r, e = v.Evaluate(); e != nil { panic(self.err(p, "cannot evaluate expression: " + e.Error())) } /* skip the last ')' */ v.Free() self.lex.pos = q return } func (self *Parser) relx(tk _Token) { if tk.tag != _T_punc || tk.punc() != _P_lbrk { panic(self.err(tk.pos, "'(' expected for RIP-relative addressing")) } else if tk = self.lex.next(); self.regx(tk) != rip { panic(self.err(tk.pos, "RIP-relative addressing expects %rip as the base register")) } else if tk = self.lex.next(); tk.tag != _T_punc || tk.punc() != _P_rbrk { panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling")) } } func (self *Parser) immx(tk _Token) int64 { if tk.tag != _T_punc || tk.punc() != _P_dollar { panic(self.err(tk.pos, "'$' expected for registers")) } else if tk = self.lex.read(); tk.tag == _T_int { return int64(tk.u64) } else if tk.tag == _T_punc && tk.punc() == _P_lbrk { return self.eval(self.lex.pos) } else if tk.tag == _T_punc && tk.punc() == _P_minus { return self.negv() } else { panic(self.err(tk.pos, "immediate value expected")) } } func (self *Parser) regx(tk _Token) Register { if tk.tag != _T_punc || tk.punc() != _P_percent { panic(self.err(tk.pos, "'%' expected for registers")) } else if tk = self.lex.read(); tk.tag != _T_name { panic(self.err(tk.pos, "register name expected")) } else if tk.str == "rip" { return rip } else if reg, ok := Registers[tk.str]; ok { return reg } else { panic(self.err(tk.pos, "invalid register name: " + strconv.Quote(tk.str))) } } func (self *Parser) regv(tk _Token) Register { if reg := self.regx(tk); reg == rip { panic(self.err(tk.pos, "%rip is not accessable as a dedicated register")) } else { return reg } } func (self *Parser) disp(vv int32) MemoryAddress { switch tk := self.lex.next(); tk.tag { case _T_end : return MemoryAddress { Displacement: vv } case _T_punc : return self.relm(tk, vv) default : panic(self.err(tk.pos, "',' or '(' expected")) } } func (self *Parser) relm(tv _Token, disp int32) MemoryAddress { var tk _Token var tt _TokenKind /* check for absolute addressing */ if tv.punc() == _P_comma { self.lex.pos-- return MemoryAddress { Displacement: disp } } /* must be '(' now */ if tv.punc() != _P_lbrk { panic(self.err(tv.pos, "',' or '(' expected")) } /* read the next token */ tk = self.lex.next() tt = tk.tag /* must be a punctuation */ if tt != _T_punc { panic(self.err(tk.pos, "'%' or ',' expected")) } /* check for base */ switch tk.punc() { case _P_percent : return self.base(tk, disp) case _P_comma : return self.index(nil, disp) default : panic(self.err(tk.pos, "'%' or ',' expected")) } } func (self *Parser) base(tk _Token, disp int32) MemoryAddress { rr := self.regx(tk) nk := self.lex.next() /* check for register indirection or base-index addressing */ if !isReg64(rr) { panic(self.err(tk.pos, "not a valid base register")) } else if nk.tag != _T_punc { panic(self.err(nk.pos, "',' or ')' expected")) } else if nk.punc() == _P_comma { return self.index(rr, disp) } else if nk.punc() == _P_rbrk { return MemoryAddress { Base: rr, Displacement: disp } } else { panic(self.err(nk.pos, "',' or ')' expected")) } } func (self *Parser) index(base Register, disp int32) MemoryAddress { tk := self.lex.next() rr := self.regx(tk) nk := self.lex.next() /* check for scaled indexing */ if base == rip { panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling")) } else if !isIndexable(rr) { panic(self.err(tk.pos, "not a valid index register")) } else if nk.tag != _T_punc { panic(self.err(nk.pos, "',' or ')' expected")) } else if nk.punc() == _P_comma { return self.scale(base, rr, disp) } else if nk.punc() == _P_rbrk { return MemoryAddress { Base: base, Index: rr, Scale: 1, Displacement: disp } } else { panic(self.err(nk.pos, "',' or ')' expected")) } } func (self *Parser) scale(base Register, index Register, disp int32) MemoryAddress { tk := self.lex.next() tt := tk.tag tv := tk.u64 /* must be an integer */ if tt != _T_int { panic(self.err(tk.pos, "integer expected")) } /* scale can only be 1, 2, 4 or 8 */ if tv == 0 || (_Scales & (1 << tv)) == 0 { panic(self.err(tk.pos, "scale can only be 1, 2, 4 or 8")) } /* read next token */ tk = self.lex.next() tt = tk.tag /* check for the closing ')' */ if tt != _T_punc || tk.punc() != _P_rbrk { panic(self.err(tk.pos, "')' expected")) } /* construct the memory address */ return MemoryAddress { Base : base, Index : index, Scale : uint8(tv), Displacement : disp, } } func (self *Parser) cmds() *ParsedLine { cmd := "" pos := self.lex.pos buf := []ParsedCommandArg(nil) /* find the end of command */ for p := pos; pos < len(self.lex.src); pos++ { if unicode.IsSpace(self.lex.src[pos]) { cmd = string(self.lex.src[p:pos]) break } } /* parse the arguments */ loop: for { switch self.next(&pos) { case 0 : break loop case '#' : break loop case '"' : pos = self.strings(&buf, pos) default : pos = self.expressions(&buf, pos) } } /* construct the line */ return &ParsedLine { Row : self.lex.row, Src : self.lex.src, Kind : LineCommand, Command : ParsedCommand { Cmd : cmd, Args : buf, }, } } func (self *Parser) feed(line string) *ParsedLine { ff := true rr := false lk := false /* reset the lexer */ self.lex.row++ self.lex.init(line) /* parse the first token */ tk := self.lex.next() tt := tk.tag /* it is a directive if it starts with a dot */ if tk.tag == _T_punc && tk.punc() == _P_dot { return self.cmds() } /* otherwise it could be labels or instructions */ if tt != _T_name { panic(self.err(tk.pos, "identifier expected")) } /* peek the next token */ lex := self.lex tkx := lex.next() /* check for labels */ if tkx.tag == _T_punc && tkx.punc() == _P_colon { tkx = lex.next() ttx := tkx.tag /* the line must end here */ if ttx != _T_end { panic(self.err(tkx.pos, "garbage after label definition")) } /* construct the label */ return &ParsedLine { Row : self.lex.row, Src : self.lex.src, Kind : LineLabel, Label : ParsedLabel { Kind: Declaration, Name: tk.str, }, } } /* special case for the "lock" prefix */ if tk.tag == _T_name && strings.ToLower(tk.str) == "lock" { lk = true tk = self.lex.next() /* must be an instruction */ if tk.tag != _T_name { panic(self.err(tk.pos, "identifier expected")) } } /* set the line kind and mnemonic */ ret := &ParsedLine { Row : self.lex.row, Src : self.lex.src, Kind : LineInstr, Instruction : ParsedInstruction { Mnemonic: strings.ToLower(tk.str) }, } /* check for LOCK prefix */ if lk { ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, PrefixLock) } /* parse all the operands */ for { tk = self.lex.next() tt = tk.tag /* check for end of line */ if tt == _T_end { break } /* expect a comma if not the first operand */ if !ff { if tt == _T_punc && tk.punc() == _P_comma { tk = self.lex.next() } else { panic(self.err(tk.pos, "',' expected")) } } /* not the first operand anymore */ ff = false tt = tk.tag /* encountered an integer, must be a SIB memory address */ if tt == _T_int { ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64)))) continue } /* encountered an identifier, maybe an expression or a jump target, or a segment override prefix */ if tt == _T_name { ts := tk.str tp := self.lex.pos /* if the next token is EOF or a comma, it's a jumpt target */ if tk = self.lex.next(); tk.tag == _T_end || (tk.tag == _T_punc && tk.punc() == _P_comma) { self.lex.pos = tp ret.Instruction.target(ts) continue } /* if it is a colon, it's a segment override prefix, otherwise it must be an RIP-relative addressing operand */ if tk.tag != _T_punc || tk.punc() != _P_colon { self.relx(tk) ret.Instruction.reference(ts) continue } /* lookup segment prefixes */ if p, ok := _SegPrefix[strings.ToLower(ts)]; !ok { panic(self.err(tk.pos, "invalid segment name")) } else { ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, p) } /* read the next token */ tk = self.lex.next() tt = tk.tag /* encountered an integer, must be a SIB memory address */ if tt == _T_int { ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64)))) continue } } /* certain instructions may have a "*" before operands */ if tt == _T_punc && tk.punc() == _P_star { tk = self.lex.next() tt = tk.tag rr = true } /* ... otherwise it must be a punctuation */ if tt != _T_punc { panic(self.err(tk.pos, "'$', '%', '-' or '(' expected")) } /* check the operator */ switch tk.punc() { case _P_lbrk : break case _P_minus : ret.Instruction.mem(self.disp(self.i32(tk, self.negv()))) ; continue case _P_dollar : ret.Instruction.imm(self.immx(tk)) ; continue case _P_percent : ret.Instruction.reg(self.regv(tk)) ; continue default : panic(self.err(tk.pos, "'$', '%', '-' or '(' expected")) } /* special case of '(', might be either `(expr)(SIB)` or just `(SIB)` * read one more token to confirm */ tk = self.lex.next() tt = tk.tag /* the next token is '%', it's a memory address, * or ',' if it's a memory address without base, * otherwise it must be in `(expr)(SIB)` form */ if tk.tag == _T_punc && tk.punc() == _P_percent { ret.Instruction.mem(self.base(tk, 0)) } else if tk.tag == _T_punc && tk.punc() == _P_comma { ret.Instruction.mem(self.index(nil, 0)) } else { ret.Instruction.mem(self.disp(self.i32(tk, self.eval(tk.pos)))) } } /* check "jmp" and "call" instructions */ if !_RegBranch[ret.Instruction.Mnemonic] { return ret } else if len(ret.Instruction.Operands) != 1 { panic(self.err(tk.pos, fmt.Sprintf(`"%s" requires exact 1 argument`, ret.Instruction.Mnemonic))) } else if !rr && ret.Instruction.Operands[0].Op != OpReg && ret.Instruction.Operands[0].Op != OpLabel { panic(self.err(tk.pos, fmt.Sprintf(`invalid operand for "%s" instruction`, ret.Instruction.Mnemonic))) } else { return ret } } func (self *Parser) next(p *int) rune { for { if *p >= len(self.lex.src) { return 0 } else if cc := self.lex.src[*p]; !unicode.IsSpace(cc) { return cc } else { *p++ } } } func (self *Parser) delim(p int) int { if cc := self.next(&p); cc == 0 { return p } else if cc == ',' { return p + 1 } else { panic(self.err(p, "',' expected")) } } func (self *Parser) strings(argv *[]ParsedCommandArg, p int) int { var i int var e error var v string /* find the end of string */ for i = p + 1; i < len(self.lex.src) && self.lex.src[i] != '"'; i++ { if self.lex.src[i] == '\\' { i++ } } /* check for EOF */ if i == len(self.lex.src) { panic(self.err(i, "unexpected EOF when scanning strings")) } /* unquote the string */ if v, e = strconv.Unquote(string(self.lex.src[p:i + 1])); e != nil { panic(self.err(p, "invalid string: " + e.Error())) } /* add the argument to buffer */ *argv = append(*argv, ParsedCommandArg { Value: v, IsString: true }) return self.delim(i + 1) } func (self *Parser) directives(line string) { self.lex.row++ self.lex.init(line) /* parse the first token */ tk := self.lex.next() tt := tk.tag /* check for EOF */ if tt == _T_end { return } /* must be a directive */ if tt != _T_punc || tk.punc() != _P_hash { panic(self.err(tk.pos, "'#' expected")) } /* parse the line number */ tk = self.lex.next() tt = tk.tag /* must be a line number, if it is, set the row number, and ignore the rest of the line */ if tt != _T_int { panic(self.err(tk.pos, "line number expected")) } else { self.lex.row = int(tk.u64) - 1 } } func (self *Parser) expressions(argv *[]ParsedCommandArg, p int) int { var i int var n int var s int /* scan until the first standalone ',' or EOF */ loop: for i = p; i < len(self.lex.src); i++ { switch self.lex.src[i] { case ',' : if s == 0 { if n == 0 { break loop } } case ']', '}', '>' : if s == 0 { if n == 0 { break loop } else { n-- } } case '[', '{', '<' : if s == 0 { n++ } case '\\' : if s != 0 { i++ } case '\'' : if s != 2 { s ^= 1 } case '"' : if s != 1 { s ^= 2 } } } /* check for EOF in strings */ if s != 0 { panic(self.err(i, "unexpected EOF when scanning strings")) } /* check for bracket matching */ if n != 0 { panic(self.err(i, "unbalanced '{' or '[' or '<'")) } /* add the argument to buffer */ *argv = append(*argv, ParsedCommandArg { Value: string(self.lex.src[p:i]) }) return self.delim(i) } // Feed feeds the parser with one more line, and the parser // parses it into a ParsedLine. // // NOTE: Feed does not handle empty lines or multiple lines, // it panics when this happens. Use Parse to parse multiple // lines of assembly source. // func (self *Parser) Feed(src string) (ret *ParsedLine, err error) { var ok bool var ss string var vv interface{} /* check for multiple lines */ if strings.ContainsRune(src, '\n') { return nil, errors.New("passing multiple lines to Feed()") } /* check for blank lines */ if ss = strings.TrimSpace(src); ss == "" || ss[0] == '#' || strings.HasPrefix(ss, "//") { return nil, errors.New("blank line or line with only comments or line-marks") } /* setup error handler */ defer func() { if vv = recover(); vv != nil { if err, ok = vv.(*SyntaxError); !ok { panic(vv) } } }() /* call the actual parser */ ret = self.feed(src) return } // Parse parses the entire assembly source (possibly multiple lines) into // a sequence of *ParsedLine. func (self *Parser) Parse(src string) (ret []*ParsedLine, err error) { var ok bool var ss string var vv interface{} /* setup error handler */ defer func() { if vv = recover(); vv != nil { if err, ok = vv.(*SyntaxError); !ok { panic(vv) } } }() /* feed every line */ for _, line := range strings.Split(src, "\n") { if ss = strings.TrimSpace(line); ss == "" || strings.HasPrefix(ss, "//") { self.lex.row++ } else if ss[0] == '#' { self.directives(line) } else { ret = append(ret, self.feed(line)) } } /* all done */ err = nil return } // Directive handles the directive. func (self *Parser) Directive(line string) (err error) { var ok bool var ss string var vv interface{} /* check for directives */ if ss = strings.TrimSpace(line); ss == "" || ss[0] != '#' { return errors.New("not a directive") } /* setup error handler */ defer func() { if vv = recover(); vv != nil { if err, ok = vv.(*SyntaxError); !ok { panic(vv) } } }() /* call the directive parser */ self.directives(line) return } type _TermRepo struct { terms map[string]expr.Term } func (self *_TermRepo) Get(name string) (expr.Term, error) { if ret, ok := self.terms[name]; ok { return ret, nil } else { return nil, errors.New("undefined name: " + name) } } func (self *_TermRepo) label(name string) (*Label, error) { var ok bool var lb *Label var tr expr.Term /* check for existing terms */ if tr, ok = self.terms[name]; ok { if lb, ok = tr.(*Label); ok { return lb, nil } else { return nil, errors.New("name is not a label: " + name) } } /* create a new one as needed */ lb = new(Label) lb.Name = name /* create the map if needed */ if self.terms == nil { self.terms = make(map[string]expr.Term, 1) } /* register the label */ self.terms[name] = lb return lb, nil } func (self *_TermRepo) define(name string, term expr.Term) { var ok bool var tr expr.Term /* create the map if needed */ if self.terms == nil { self.terms = make(map[string]expr.Term, 1) } /* check for existing terms */ if tr, ok = self.terms[name]; !ok { self.terms[name] = term } else if _, ok = tr.(*Label); !ok { self.terms[name] = term } else { panic("conflicting term types: " + name) } } // _Command describes an assembler command. // // The _Command.args describes both the arity and argument type with characters, // the length is the number of arguments, the character itself represents the // argument type. // // Possible values are: // // s This argument should be a string // e This argument should be an expression // ? The next argument is optional, and must be the last argument. // type _Command struct { args string handler func(*Assembler, *Program, []ParsedCommandArg) error } // Options controls the behavior of Assembler. type Options struct { // InstructionAliasing specifies whether to enable instruction aliasing. // Set to true enables instruction aliasing, and the Assembler will try harder to find instructions. InstructionAliasing bool // IgnoreUnknownDirectives specifies whether to report errors when encountered unknown directives. // Set to true ignores all unknwon directives silently, useful for parsing generated assembly. IgnoreUnknownDirectives bool } // Assembler assembles the entire assembly program and generates the corresponding // machine code representations. type Assembler struct { cc int ps Parser pc uintptr buf []byte main string opts Options repo _TermRepo expr expr.Parser line *ParsedLine } var asmCommands = map[string]_Command { "org" : { "e" , (*Assembler).assembleCommandOrg }, "set" : { "ee" , (*Assembler).assembleCommandSet }, "byte" : { "e" , (*Assembler).assembleCommandByte }, "word" : { "e" , (*Assembler).assembleCommandWord }, "long" : { "e" , (*Assembler).assembleCommandLong }, "quad" : { "e" , (*Assembler).assembleCommandQuad }, "fill" : { "e?e" , (*Assembler).assembleCommandFill }, "space" : { "e?e" , (*Assembler).assembleCommandFill }, "align" : { "e?e" , (*Assembler).assembleCommandAlign }, "entry" : { "e" , (*Assembler).assembleCommandEntry }, "ascii" : { "s" , (*Assembler).assembleCommandAscii }, "asciz" : { "s" , (*Assembler).assembleCommandAsciz }, "p2align" : { "e?e" , (*Assembler).assembleCommandP2Align }, } func (self *Assembler) err(msg string) *SyntaxError { return &SyntaxError { Pos : -1, Row : self.line.Row, Src : self.line.Src, Reason : msg, } } func (self *Assembler) eval(expr string) (int64, error) { if exp, err := self.expr.SetSource(expr).Parse(nil); err != nil { return 0, err } else { return exp.Evaluate() } } func (self *Assembler) checkArgs(i int, n int, v *ParsedCommand, isString bool) error { if i >= len(v.Args) { return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(v.Cmd), n)) } else if isString && !v.Args[i].IsString { return self.err(fmt.Sprintf("argument %d of command %s must be a string", i + 1, strconv.Quote(v.Cmd))) } else if !isString && v.Args[i].IsString { return self.err(fmt.Sprintf("argument %d of command %s must be an expression", i + 1, strconv.Quote(v.Cmd))) } else { return nil } } func (self *Assembler) assembleLabel(p *Program, lb *ParsedLabel) error { if v, err := self.repo.label(lb.Name); err != nil { return err } else { p.Link(v) return nil } } func (self *Assembler) assembleInstr(p *Program, line *ParsedInstruction) (err error) { var ok bool var pfx []byte var ops []interface{} var enc _InstructionEncoder /* convert to lower-case */ opts := self.opts name := strings.ToLower(line.Mnemonic) /* fix register-addressing branches if needed */ if opts.InstructionAliasing && len(line.Operands) == 1 { switch { case name == "retq" : name = "ret" case name == "movabsq" : name = "movq" case name == "jmp" && line.Operands[0].Op != OpLabel : name = "jmpq" case name == "jmpq" && line.Operands[0].Op == OpLabel : name = "jmp" case name == "call" && line.Operands[0].Op != OpLabel : name = "callq" case name == "callq" && line.Operands[0].Op == OpLabel : name = "call" } } /* lookup from the alias table if needed */ if opts.InstructionAliasing { enc, ok = _InstructionAliases[name] } /* lookup from the instruction table */ if !ok { enc, ok = Instructions[name] } /* remove size suffix if possible */ if !ok && opts.InstructionAliasing { switch i := len(name) - 1; name[i] { case 'b', 'w', 'l', 'q': { enc, ok = Instructions[name[:i]] } } } /* check for instruction name */ if !ok { return self.err("no such instruction: " + strconv.Quote(name)) } /* allocate memory for prefix if any */ if len(line.Prefixes) != 0 { pfx = make([]byte, len(line.Prefixes)) } /* convert the prefixes */ for i, v := range line.Prefixes { switch v { case PrefixLock : pfx[i] = _P_lock case PrefixSegmentCS : pfx[i] = _P_cs case PrefixSegmentDS : pfx[i] = _P_ds case PrefixSegmentES : pfx[i] = _P_es case PrefixSegmentFS : pfx[i] = _P_fs case PrefixSegmentGS : pfx[i] = _P_gs case PrefixSegmentSS : pfx[i] = _P_ss default : panic("unreachable: invalid segment prefix") } } /* convert the operands */ for _, op := range line.Operands { switch op.Op { case OpImm : ops = append(ops, op.Imm) case OpReg : ops = append(ops, op.Reg) case OpMem : self.assembleInstrMem(&ops, op.Memory) case OpLabel : self.assembleInstrLabel(&ops, op.Label) default : panic("parser yields an invalid operand kind") } } /* catch any exceptions in the encoder */ defer func() { if v := recover(); v != nil { err = self.err(fmt.Sprint(v)) } }() /* encode the instruction */ enc(p, ops...).prefix = pfx return nil } func (self *Assembler) assembleInstrMem(ops *[]interface{}, addr MemoryAddress) { mem := new(MemoryOperand) *ops = append(*ops, mem) /* check for RIP-relative addressing */ if addr.Base != rip { mem.Addr.Type = Memory mem.Addr.Memory = addr } else { mem.Addr.Type = Offset mem.Addr.Offset = RelativeOffset(addr.Displacement) } } func (self *Assembler) assembleInstrLabel(ops *[]interface{}, label ParsedLabel) { vk := label.Kind tr, err := self.repo.label(label.Name) /* check for errors */ if err != nil { panic(err) } /* check for branch target */ if vk == BranchTarget { *ops = append(*ops, tr) return } /* add to ops */ *ops = append(*ops, &MemoryOperand { Addr: Addressable { Type : Reference, Reference : tr, }, }) } func (self *Assembler) assembleCommand(p *Program, line *ParsedCommand) error { var iv int var cc rune var ok bool var va bool var fn _Command /* find the command */ if fn, ok = asmCommands[line.Cmd]; !ok { if self.opts.IgnoreUnknownDirectives { return nil } else { return self.err("no such command: " + strconv.Quote(line.Cmd)) } } /* expected & real argument count */ argx := len(fn.args) argc := len(line.Args) /* check the arguments */ loop: for iv, cc = range fn.args { switch cc { case '?' : va = true; break loop case 's' : if err := self.checkArgs(iv, argx, line, true) ; err != nil { return err } case 'e' : if err := self.checkArgs(iv, argx, line, false) ; err != nil { return err } default : panic("invalid argument descriptor: " + strconv.Quote(fn.args)) } } /* simple case: non-variadic command */ if !va { if argc == argx { return fn.handler(self, p, line.Args) } else { return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(line.Cmd), argx)) } } /* check for the descriptor */ if iv != argx - 2 { panic("invalid argument descriptor: " + strconv.Quote(fn.args)) } /* variadic command and the final optional argument is set */ if argc == argx - 1 { switch fn.args[argx - 1] { case 's' : if err := self.checkArgs(iv, -1, line, true) ; err != nil { return err } case 'e' : if err := self.checkArgs(iv, -1, line, false) ; err != nil { return err } default : panic("invalid argument descriptor: " + strconv.Quote(fn.args)) } } /* check argument count */ if argc == argx - 1 || argc == argx - 2 { return fn.handler(self, p, line.Args) } else { return self.err(fmt.Sprintf("command %s takes %d or %d arguments", strconv.Quote(line.Cmd), argx - 2, argx - 1)) } } func (self *Assembler) assembleCommandInt(p *Program, argv []ParsedCommandArg, addfn func(*Program, *expr.Expr) *Instruction) error { var err error var val *expr.Expr /* parse the expression */ if val, err = self.expr.SetSource(argv[0].Value).Parse(&self.repo); err != nil { return err } /* add to the program */ addfn(p, val) return nil } func (self *Assembler) assembleCommandOrg(_ *Program, argv []ParsedCommandArg) error { var err error var val int64 /* evaluate the expression */ if val, err = self.eval(argv[0].Value); err != nil { return err } /* check for origin */ if val < 0 { return self.err(fmt.Sprintf("negative origin: %d", val)) } /* ".org" must be the first command if any */ if self.cc != 1 { return self.err(".org must be the first command if present") } /* set the initial program counter */ self.pc = uintptr(val) return nil } func (self *Assembler) assembleCommandSet(_ *Program, argv []ParsedCommandArg) error { var err error var val *expr.Expr /* parse the expression */ if val, err = self.expr.SetSource(argv[1].Value).Parse(&self.repo); err != nil { return err } /* define the new identifier */ self.repo.define(argv[0].Value, val) return nil } func (self *Assembler) assembleCommandByte(p *Program, argv []ParsedCommandArg) error { return self.assembleCommandInt(p, argv, (*Program).Byte) } func (self *Assembler) assembleCommandWord(p *Program, argv []ParsedCommandArg) error { return self.assembleCommandInt(p, argv, (*Program).Word) } func (self *Assembler) assembleCommandLong(p *Program, argv []ParsedCommandArg) error { return self.assembleCommandInt(p, argv, (*Program).Long) } func (self *Assembler) assembleCommandQuad(p *Program, argv []ParsedCommandArg) error { return self.assembleCommandInt(p, argv, (*Program).Quad) } func (self *Assembler) assembleCommandFill(p *Program, argv []ParsedCommandArg) error { var fv byte var nb int64 var ex error /* evaluate the size */ if nb, ex = self.eval(argv[0].Value); ex != nil { return ex } /* check for filling size */ if nb < 0 { return self.err(fmt.Sprintf("negative filling size: %d", nb)) } /* check for optional filling value */ if len(argv) == 2 { if val, err := self.eval(argv[1].Value); err != nil { return err } else if val < math.MinInt8 || val > math.MaxUint8 { return self.err(fmt.Sprintf("value %d cannot be represented with a byte", val)) } else { fv = byte(val) } } /* fill with specified byte */ p.Data(bytes.Repeat([]byte { fv }, int(nb))) return nil } func (self *Assembler) assembleCommandAlign(p *Program, argv []ParsedCommandArg) error { var nb int64 var ex error var fv *expr.Expr /* evaluate the size */ if nb, ex = self.eval(argv[0].Value); ex != nil { return ex } /* check for alignment value */ if nb <= 0 { return self.err(fmt.Sprintf("zero or negative alignment: %d", nb)) } /* alignment must be a power of 2 */ if (nb & (nb - 1)) != 0 { return self.err(fmt.Sprintf("alignment must be a power of 2: %d", nb)) } /* check for optional filling value */ if len(argv) == 2 { if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil { fv = v } else { return err } } /* fill with specified byte, default to 0 if not specified */ p.Align(uint64(nb), fv) return nil } func (self *Assembler) assembleCommandEntry(_ *Program, argv []ParsedCommandArg) error { name := argv[0].Value rbuf := []rune(name) /* check all the characters */ for i, cc := range rbuf { if !isident0(cc) && (i == 0 || !isident(cc)) { return self.err("entry point must be a label name") } } /* set the main entry point */ self.main = name return nil } func (self *Assembler) assembleCommandAscii(p *Program, argv []ParsedCommandArg) error { p.Data([]byte(argv[0].Value)) return nil } func (self *Assembler) assembleCommandAsciz(p *Program, argv []ParsedCommandArg) error { p.Data(append([]byte(argv[0].Value), 0)) return nil } func (self *Assembler) assembleCommandP2Align(p *Program, argv []ParsedCommandArg) error { var nb int64 var ex error var fv *expr.Expr /* evaluate the size */ if nb, ex = self.eval(argv[0].Value); ex != nil { return ex } /* check for alignment value */ if nb <= 0 { return self.err(fmt.Sprintf("zero or negative alignment: %d", nb)) } /* check for optional filling value */ if len(argv) == 2 { if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil { fv = v } else { return err } } /* fill with specified byte, default to 0 if not specified */ p.Align(1 << nb, fv) return nil } // Base returns the origin. func (self *Assembler) Base() uintptr { return self.pc } // Code returns the assembled machine code. func (self *Assembler) Code() []byte { return self.buf } // Entry returns the address of the specified entry point, or the origin if not specified. func (self *Assembler) Entry() uintptr { if self.main == "" { return self.pc } else if tr, err := self.repo.Get(self.main); err != nil { panic(err) } else if val, err := tr.Evaluate(); err != nil { panic(err) } else { return uintptr(val) } } // Options returns the internal options reference, changing it WILL affect this Assembler instance. func (self *Assembler) Options() *Options { return &self.opts } // WithBase resets the origin to pc. func (self *Assembler) WithBase(pc uintptr) *Assembler { self.pc = pc return self } // Assemble assembles the assembly source and save the machine code to internal buffer. func (self *Assembler) Assemble(src string) error { var err error var buf []*ParsedLine /* parse the source */ if buf, err = self.ps.Parse(src); err != nil { return err } /* create a new program */ p := DefaultArch.CreateProgram() defer p.Free() /* process every line */ for _, self.line = range buf { switch self.cc++; self.line.Kind { case LineLabel : if err = self.assembleLabel (p, &self.line.Label) ; err != nil { return err } case LineInstr : if err = self.assembleInstr (p, &self.line.Instruction) ; err != nil { return err } case LineCommand : if err = self.assembleCommand (p, &self.line.Command) ; err != nil { return err } default : panic("parser yields an invalid line kind") } } /* assemble the program */ self.buf = p.Assemble(self.pc) return nil }