package homepage import ( "bytes" "context" "errors" "io" "net/http" "net/url" "slices" "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/vincent-petithory/dataurl" gphttp "github.com/yusing/go-proxy/internal/net/gphttp" "github.com/yusing/go-proxy/internal/utils/strutils" ) type FetchResult struct { Icon []byte StatusCode int ErrMsg string contentType string } const faviconFetchTimeout = 3 * time.Second func (res *FetchResult) OK() bool { return len(res.Icon) > 0 } func (res *FetchResult) ContentType() string { if res.contentType == "" { if bytes.HasPrefix(res.Icon, []byte(" maxRedirectDepth { return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "too many redirects"} } ctx, cancel := context.WithTimeoutCause(ctx, faviconFetchTimeout, errors.New("favicon request timeout")) defer cancel() newReq, err := http.NewRequestWithContext(ctx, "GET", r.TargetURL().String(), nil) if err != nil { return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "cannot create request"} } newReq.Header.Set("Accept-Encoding", "identity") // disable compression u, err := url.ParseRequestURI(strutils.SanitizeURI(uri)) if err != nil { return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "cannot parse uri"} } newReq.URL.Path = u.Path newReq.URL.RawPath = u.RawPath newReq.URL.RawQuery = u.RawQuery newReq.RequestURI = u.String() c := newContent() r.ServeHTTP(c, newReq) if c.status != http.StatusOK { switch c.status { case 0: return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "connection error"} default: if loc := c.Header().Get("Location"); loc != "" { loc = strutils.SanitizeURI(loc) if loc == "/" || loc == newReq.URL.Path || slices.Contains(stack, loc) { return &FetchResult{StatusCode: http.StatusBadGateway, ErrMsg: "circular redirect"} } // append current path to stack // handles redirect to the same path with different query return findIconSlow(ctx, r, loc, append(stack, newReq.URL.Path)) } } return &FetchResult{StatusCode: c.status, ErrMsg: "upstream error: " + string(c.data)} } // return icon data if !gphttp.GetContentType(c.header).IsHTML() { return &FetchResult{Icon: c.data, contentType: c.header.Get("Content-Type")} } // try extract from "link[rel=icon]" from path "/" doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(c.data)) if err != nil { return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "failed to parse html"} } ele := doc.Find("head > link[rel=icon]").First() if ele.Length() == 0 { return &FetchResult{StatusCode: http.StatusNotFound, ErrMsg: "icon element not found"} } href := ele.AttrOr("href", "") if href == "" { return &FetchResult{StatusCode: http.StatusNotFound, ErrMsg: "icon href not found"} } // https://en.wikipedia.org/wiki/Data_URI_scheme if strings.HasPrefix(href, "data:image/") { dataURI, err := dataurl.DecodeString(href) if err != nil { return &FetchResult{StatusCode: http.StatusInternalServerError, ErrMsg: "failed to decode favicon"} } return &FetchResult{Icon: dataURI.Data, contentType: dataURI.ContentType()} } switch { case strings.HasPrefix(href, "http://"), strings.HasPrefix(href, "https://"): return fetchIconAbsolute(ctx, href) default: return findIconSlow(ctx, r, href, append(stack, newReq.URL.Path)) } }