Switched to go-import and go-source parsing as in Go itself.
This commit is contained in:
parent
d72da11c30
commit
392702f34d
@ -3,6 +3,7 @@ package golang
|
|||||||
import (
|
import (
|
||||||
// stdlib
|
// stdlib
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"encoding/xml"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -14,6 +15,17 @@ import (
|
|||||||
"go.dev.pztrn.name/glp/structs"
|
"go.dev.pztrn.name/glp/structs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// attrValue returns the attribute value for the case-insensitive key
|
||||||
|
// `name', or the empty string if nothing is found.
|
||||||
|
func attrValue(attrs []xml.Attr, name string) string {
|
||||||
|
for _, a := range attrs {
|
||||||
|
if strings.EqualFold(a.Name.Local, name) {
|
||||||
|
return a.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// Gets go-import and go-source data and fill it in dependency.
|
// Gets go-import and go-source data and fill it in dependency.
|
||||||
func getGoData(dependency *structs.Dependency) {
|
func getGoData(dependency *structs.Dependency) {
|
||||||
// Dependencies are imported using URL which can be called with
|
// Dependencies are imported using URL which can be called with
|
||||||
@ -34,95 +46,55 @@ func getGoData(dependency *structs.Dependency) {
|
|||||||
// line-by-line for <head> parsing.
|
// line-by-line for <head> parsing.
|
||||||
resp := bytes.NewBuffer(respBody)
|
resp := bytes.NewBuffer(respBody)
|
||||||
|
|
||||||
var (
|
// Adopted headers parsing algo from Go itself.
|
||||||
// This flag shows that we're currently parsing <head> from HTML.
|
// See https://github.com/golang/go/blob/95e1ea4598175a3461f40d00ce47a51e5fa6e5ea/src/cmd/go/internal/get/discovery.go
|
||||||
headCurrentlyParsing bool
|
|
||||||
)
|
decoder := xml.NewDecoder(resp)
|
||||||
|
decoder.Strict = false
|
||||||
|
|
||||||
for {
|
for {
|
||||||
line, err := resp.ReadString('\n')
|
token, err := decoder.Token()
|
||||||
if err != nil && err != io.EOF {
|
if err != nil {
|
||||||
log.Fatalln("Failed to read HTML response line-by-line:", err.Error())
|
if err != io.EOF {
|
||||||
} else if err != nil && err == io.EOF {
|
log.Fatalln("Failed to parse dependency's go-source and go-import things:", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if headCurrentlyParsing {
|
if e, ok := token.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") {
|
||||||
// Check for go-import data.
|
break
|
||||||
if strings.Contains(line, `<meta name="go-import"`) {
|
}
|
||||||
// Get content.
|
|
||||||
// Import things are in element #4.
|
|
||||||
lineSplitted := strings.Split(line, `"`)
|
|
||||||
|
|
||||||
// Check line length. This is not so good approach, but
|
if e, ok := token.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") {
|
||||||
// should work for 99% of dependencies.
|
break
|
||||||
if len(lineSplitted) < 5 {
|
}
|
||||||
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - tag itself appears to be incomplete. Skipping")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(lineSplitted) > 5 {
|
e, ok := token.(xml.StartElement)
|
||||||
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - line where meta tag is located appears to be too long. Skipping")
|
if !ok || !strings.EqualFold(e.Name.Local, "meta") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Import line contains data like VCS name and VCS URL.
|
// Check if we haven't found "go-import" or "go-source" in token's
|
||||||
// They're delimited with whitespace.
|
// attributes.
|
||||||
importDataSplitted := strings.Split(lineSplitted[3], " ")
|
if attrValue(e.Attr, "name") != "go-import" && attrValue(e.Attr, "name") != "go-source" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Import line should contain at least 3 elements.
|
// Parse go-import data first.
|
||||||
if len(importDataSplitted) < 3 {
|
if attrValue(e.Attr, "name") == "go-import" {
|
||||||
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - import data is too small. Skipping")
|
if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 {
|
||||||
continue
|
dependency.VCS.VCS = f[1]
|
||||||
}
|
dependency.VCS.VCSPath = f[2]
|
||||||
|
|
||||||
// Fill dependency data with this data.
|
|
||||||
// First element is a module name and we do not actually
|
|
||||||
// need it, because it is already filled previously.
|
|
||||||
dependency.VCS.VCS = importDataSplitted[1]
|
|
||||||
dependency.VCS.VCSPath = importDataSplitted[2]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for go-source data.
|
|
||||||
if strings.Contains(line, `<meta name="go-source"`) {
|
|
||||||
// Get content.
|
|
||||||
// Import things are in element #4.
|
|
||||||
lineSplitted := strings.Split(line, `"`)
|
|
||||||
|
|
||||||
// Check line length. This is not so good approach, but
|
|
||||||
// should work for 99% of dependencies.
|
|
||||||
if len(lineSplitted) < 5 {
|
|
||||||
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - tag itself appears to be incomplete. Skipping")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(lineSplitted) > 5 {
|
|
||||||
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - line where meta tag is located appears to be too long. Skipping")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Source line contains data like VCS paths templates.
|
|
||||||
// They're delimited with whitespace.
|
|
||||||
sourceDataSplitted := strings.Split(lineSplitted[3], " ")
|
|
||||||
|
|
||||||
// Source data line should contain at least 3 elements.
|
|
||||||
if len(sourceDataSplitted) < 4 {
|
|
||||||
log.Println("Got line: '" + line + "', but it cannot be parsed. Probably badly formed - source data is too small. Skipping")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill dependency data.
|
|
||||||
dependency.VCS.SourceURLDirTemplate = sourceDataSplitted[2]
|
|
||||||
dependency.VCS.SourceURLFileTemplate = sourceDataSplitted[3]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.Contains(strings.ToLower(line), "<head>") {
|
// Then - go-source data.
|
||||||
headCurrentlyParsing = true
|
if attrValue(e.Attr, "name") == "go-source" {
|
||||||
}
|
if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 4 {
|
||||||
|
dependency.VCS.SourceURLDirTemplate = f[2]
|
||||||
if strings.Contains(strings.ToLower(line), "</head>") {
|
dependency.VCS.SourceURLFileTemplate = f[3]
|
||||||
headCurrentlyParsing = false
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user