Switched to go-import and go-source parsing as in Go itself.

This commit is contained in:
Stanislav Nikitin 2020-02-07 19:33:16 +05:00
parent d72da11c30
commit 392702f34d
No known key found for this signature in database
GPG Key ID: 106900B32F8192EE

View File

@ -3,6 +3,7 @@ package golang
import ( import (
// stdlib // stdlib
"bytes" "bytes"
"encoding/xml"
"io" "io"
"log" "log"
"net/http" "net/http"
@ -14,6 +15,17 @@ import (
"go.dev.pztrn.name/glp/structs" "go.dev.pztrn.name/glp/structs"
) )
// attrValue returns the attribute value for the case-insensitive key
// `name', or the empty string if nothing is found.
func attrValue(attrs []xml.Attr, name string) string {
for _, a := range attrs {
if strings.EqualFold(a.Name.Local, name) {
return a.Value
}
}
return ""
}
// Gets go-import and go-source data and fill it in dependency. // Gets go-import and go-source data and fill it in dependency.
func getGoData(dependency *structs.Dependency) { func getGoData(dependency *structs.Dependency) {
// Dependencies are imported using URL which can be called with // Dependencies are imported using URL which can be called with
@ -34,95 +46,55 @@ func getGoData(dependency *structs.Dependency) {
// line-by-line for <head> parsing. // line-by-line for <head> parsing.
resp := bytes.NewBuffer(respBody) resp := bytes.NewBuffer(respBody)
var ( // Adopted headers parsing algo from Go itself.
// This flag shows that we're currently parsing <head> from HTML. // See https://github.com/golang/go/blob/95e1ea4598175a3461f40d00ce47a51e5fa6e5ea/src/cmd/go/internal/get/discovery.go
headCurrentlyParsing bool
) decoder := xml.NewDecoder(resp)
decoder.Strict = false
for { for {
line, err := resp.ReadString('\n') token, err := decoder.Token()
if err != nil && err != io.EOF { if err != nil {
log.Fatalln("Failed to read HTML response line-by-line:", err.Error()) if err != io.EOF {
} else if err != nil && err == io.EOF { log.Fatalln("Failed to parse dependency's go-source and go-import things:", err.Error())
}
break break
} }
if headCurrentlyParsing { if e, ok := token.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") {
// Check for go-import data. break
if strings.Contains(line, `<meta name="go-import"`) { }
// Get content.
// Import things are in element #4.
lineSplitted := strings.Split(line, `"`)
// Check line length. This is not so good approach, but if e, ok := token.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") {
// should work for 99% of dependencies. break
if len(lineSplitted) < 5 { }
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - tag itself appears to be incomplete. Skipping")
e, ok := token.(xml.StartElement)
if !ok || !strings.EqualFold(e.Name.Local, "meta") {
continue continue
} }
if len(lineSplitted) > 5 { // Check if we haven't found "go-import" or "go-source" in token's
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - line where meta tag is located appears to be too long. Skipping") // attributes.
if attrValue(e.Attr, "name") != "go-import" && attrValue(e.Attr, "name") != "go-source" {
continue continue
} }
// Import line contains data like VCS name and VCS URL. // Parse go-import data first.
// They're delimited with whitespace. if attrValue(e.Attr, "name") == "go-import" {
importDataSplitted := strings.Split(lineSplitted[3], " ") if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 {
dependency.VCS.VCS = f[1]
// Import line should contain at least 3 elements. dependency.VCS.VCSPath = f[2]
if len(importDataSplitted) < 3 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - import data is too small. Skipping")
continue
}
// Fill dependency data with this data.
// First element is a module name and we do not actually
// need it, because it is already filled previously.
dependency.VCS.VCS = importDataSplitted[1]
dependency.VCS.VCSPath = importDataSplitted[2]
}
// Check for go-source data.
if strings.Contains(line, `<meta name="go-source"`) {
// Get content.
// Import things are in element #4.
lineSplitted := strings.Split(line, `"`)
// Check line length. This is not so good approach, but
// should work for 99% of dependencies.
if len(lineSplitted) < 5 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - tag itself appears to be incomplete. Skipping")
continue
}
if len(lineSplitted) > 5 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - line where meta tag is located appears to be too long. Skipping")
continue
}
// Source line contains data like VCS paths templates.
// They're delimited with whitespace.
sourceDataSplitted := strings.Split(lineSplitted[3], " ")
// Source data line should contain at least 3 elements.
if len(sourceDataSplitted) < 4 {
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - source data is too small. Skipping")
continue
}
// Fill dependency data.
dependency.VCS.SourceURLDirTemplate = sourceDataSplitted[2]
dependency.VCS.SourceURLFileTemplate = sourceDataSplitted[3]
} }
} }
if strings.Contains(strings.ToLower(line), "<head>") { // Then - go-source data.
headCurrentlyParsing = true if attrValue(e.Attr, "name") == "go-source" {
if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 4 {
dependency.VCS.SourceURLDirTemplate = f[2]
dependency.VCS.SourceURLFileTemplate = f[3]
} }
if strings.Contains(strings.ToLower(line), "</head>") {
headCurrentlyParsing = false
} }
} }