Switched to go-import and go-source parsing as in Go itself.

This commit is contained in:
Stanislav Nikitin 2020-02-07 19:33:16 +05:00
parent d72da11c30
commit 392702f34d
No known key found for this signature in database
GPG Key ID: 106900B32F8192EE

View File

@ -3,6 +3,7 @@ package golang
import (
// stdlib
"bytes"
"encoding/xml"
"io"
"log"
"net/http"
@ -14,6 +15,17 @@ import (
"go.dev.pztrn.name/glp/structs"
)
// attrValue returns the attribute value for the case-insensitive key
// `name', or the empty string if nothing is found.
func attrValue(attrs []xml.Attr, name string) string {
for _, a := range attrs {
if strings.EqualFold(a.Name.Local, name) {
return a.Value
}
}
return ""
}
// Gets go-import and go-source data and fill it in dependency.
func getGoData(dependency *structs.Dependency) {
// Dependencies are imported using URL which can be called with
@ -34,95 +46,55 @@ func getGoData(dependency *structs.Dependency) {
// line-by-line for <head> parsing.
resp := bytes.NewBuffer(respBody)
var (
// This flag shows that we're currently parsing <head> from HTML.
headCurrentlyParsing bool
)
// Adopted headers parsing algo from Go itself.
// See https://github.com/golang/go/blob/95e1ea4598175a3461f40d00ce47a51e5fa6e5ea/src/cmd/go/internal/get/discovery.go
decoder := xml.NewDecoder(resp)
decoder.Strict = false
for {
line, err := resp.ReadString('\n')
if err != nil && err != io.EOF {
log.Fatalln("Failed to read HTML response line-by-line:", err.Error())
} else if err != nil && err == io.EOF {
token, err := decoder.Token()
if err != nil {
if err != io.EOF {
log.Fatalln("Failed to parse dependency's go-source and go-import things:", err.Error())
}
break
}
if headCurrentlyParsing {
// Check for go-import data.
if strings.Contains(line, `<meta name="go-import"`) {
// Get content.
// Import things are in element #4.
lineSplitted := strings.Split(line, `"`)
if e, ok := token.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") {
break
}
// Check line length. This is not so good approach, but
// should work for 99% of dependencies.
if len(lineSplitted) < 5 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - tag itself appears to be incomplete. Skipping")
continue
}
if e, ok := token.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") {
break
}
if len(lineSplitted) > 5 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - line where meta tag is located appears to be too long. Skipping")
continue
}
e, ok := token.(xml.StartElement)
if !ok || !strings.EqualFold(e.Name.Local, "meta") {
continue
}
// Import line contains data like VCS name and VCS URL.
// They're delimited with whitespace.
importDataSplitted := strings.Split(lineSplitted[3], " ")
// Check if we haven't found "go-import" or "go-source" in token's
// attributes.
if attrValue(e.Attr, "name") != "go-import" && attrValue(e.Attr, "name") != "go-source" {
continue
}
// Import line should contain at least 3 elements.
if len(importDataSplitted) < 3 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - import data is too small. Skipping")
continue
}
// Fill dependency data with this data.
// First element is a module name and we do not actually
// need it, because it is already filled previously.
dependency.VCS.VCS = importDataSplitted[1]
dependency.VCS.VCSPath = importDataSplitted[2]
}
// Check for go-source data.
if strings.Contains(line, `<meta name="go-source"`) {
// Get content.
// Import things are in element #4.
lineSplitted := strings.Split(line, `"`)
// Check line length. This is not so good approach, but
// should work for 99% of dependencies.
if len(lineSplitted) < 5 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - tag itself appears to be incomplete. Skipping")
continue
}
if len(lineSplitted) > 5 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - line where meta tag is located appears to be too long. Skipping")
continue
}
// Source line contains data like VCS paths templates.
// They're delimited with whitespace.
sourceDataSplitted := strings.Split(lineSplitted[3], " ")
// Source data line should contain at least 3 elements.
if len(sourceDataSplitted) < 4 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - source data is too small. Skipping")
continue
}
// Fill dependency data.
dependency.VCS.SourceURLDirTemplate = sourceDataSplitted[2]
dependency.VCS.SourceURLFileTemplate = sourceDataSplitted[3]
// Parse go-import data first.
if attrValue(e.Attr, "name") == "go-import" {
if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 {
dependency.VCS.VCS = f[1]
dependency.VCS.VCSPath = f[2]
}
}
if strings.Contains(strings.ToLower(line), "<head>") {
headCurrentlyParsing = true
}
if strings.Contains(strings.ToLower(line), "</head>") {
headCurrentlyParsing = false
// Then - go-source data.
if attrValue(e.Attr, "name") == "go-source" {
if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 4 {
dependency.VCS.SourceURLDirTemplate = f[2]
dependency.VCS.SourceURLFileTemplate = f[3]
}
}
}