From a954ac89468e10d3d1e4603e96a6c234bf729e85 Mon Sep 17 00:00:00 2001 From: yusing Date: Sun, 13 Jul 2025 14:01:36 +0800 Subject: [PATCH] feat(middleware): introduct modify_html middleware --- internal/net/gphttp/middleware/middlewares.go | 2 + internal/net/gphttp/middleware/modify_html.go | 106 ++++ .../net/gphttp/middleware/modify_html_test.go | 557 ++++++++++++++++++ 3 files changed, 665 insertions(+) create mode 100644 internal/net/gphttp/middleware/modify_html.go create mode 100644 internal/net/gphttp/middleware/modify_html_test.go diff --git a/internal/net/gphttp/middleware/middlewares.go b/internal/net/gphttp/middleware/middlewares.go index 8ae4ee9..0e98bc4 100644 --- a/internal/net/gphttp/middleware/middlewares.go +++ b/internal/net/gphttp/middleware/middlewares.go @@ -24,6 +24,8 @@ var allMiddlewares = map[string]*Middleware{ "setxforwarded": SetXForwarded, "hidexforwarded": HideXForwarded, + "modifyhtml": ModifyHTML, + "errorpage": CustomErrorPage, "customerrorpage": CustomErrorPage, diff --git a/internal/net/gphttp/middleware/modify_html.go b/internal/net/gphttp/middleware/modify_html.go new file mode 100644 index 0000000..a397949 --- /dev/null +++ b/internal/net/gphttp/middleware/modify_html.go @@ -0,0 +1,106 @@ +package middleware + +import ( + "bytes" + "io" + "net/http" + "strconv" + + "github.com/PuerkitoBio/goquery" + "github.com/rs/zerolog/log" + gphttp "github.com/yusing/go-proxy/internal/net/gphttp" + "golang.org/x/net/html" +) + +type modifyHTML struct { + Target string // css selector + HTML string // html to inject + Replace bool // replace the target element with the new html instead of appending it +} + +var ModifyHTML = NewMiddleware[modifyHTML]() + +// modifyResponse implements ResponseModifier. +func (m *modifyHTML) modifyResponse(resp *http.Response) error { + // including text/html and application/xhtml+xml + if !gphttp.GetContentType(resp.Header).IsHTML() { + return nil + } + + content, err := io.ReadAll(resp.Body) + if err != nil { + resp.Body.Close() + return err + } + resp.Body.Close() + + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(content)) + if err != nil { + // invalid html, restore the original body + resp.Body = io.NopCloser(bytes.NewReader(content)) + log.Err(err).Str("url", fullURL(resp.Request)).Msg("invalid html found") + return nil + } + + ele := doc.Find(m.Target) + if ele.Length() == 0 { + // no target found, restore the original body + resp.Body = io.NopCloser(bytes.NewReader(content)) + return nil + } + + if m.Replace { + // replace all matching elements + ele.ReplaceWithHtml(m.HTML) + } else { + // append to the first matching element + ele.First().AppendHtml(m.HTML) + } + + h, err := buildHTML(doc) + if err != nil { + return err + } + resp.ContentLength = int64(len(h)) + resp.Header.Set("Content-Length", strconv.Itoa(len(h))) + resp.Body = io.NopCloser(bytes.NewReader(h)) + return nil +} + +// copied and modified from (*goquery.Selection).Html() +func buildHTML(s *goquery.Document) (ret []byte, err error) { + var buf bytes.Buffer + + // Merge all head nodes into one + headNodes := s.Find("head") + if headNodes.Length() > 1 { + // Get the first head node to merge everything into + firstHead := headNodes.First() + + // Merge content from all other head nodes into the first one + headNodes.Slice(1, headNodes.Length()).Each(func(i int, otherHead *goquery.Selection) { + // Move all children from other head nodes to the first head + otherHead.Children().Each(func(j int, child *goquery.Selection) { + firstHead.AppendSelection(child) + }) + }) + + // Remove the duplicate head nodes (keep only the first one) + headNodes.Slice(1, headNodes.Length()).Remove() + } + + if len(s.Nodes) > 0 { + for c := s.Nodes[0].FirstChild; c != nil; c = c.NextSibling { + err = html.Render(&buf, c) + if err != nil { + return + } + } + ret = buf.Bytes() + } + return +} + +func fullURL(req *http.Request) string { + return req.Host + req.RequestURI +} diff --git a/internal/net/gphttp/middleware/modify_html_test.go b/internal/net/gphttp/middleware/modify_html_test.go new file mode 100644 index 0000000..fa36fd7 --- /dev/null +++ b/internal/net/gphttp/middleware/modify_html_test.go @@ -0,0 +1,557 @@ +package middleware + +import ( + "net/http" + "strconv" + "strings" + "testing" + + expect "github.com/yusing/go-proxy/internal/utils/testing" +) + +func TestInjectCSS(t *testing.T) { + opts := OptionsRaw{ + "target": "head", + "html": "", + } + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html; charset=utf-8"}, + }, + respBody: []byte(` + + + Test + + +

Test

+ + + `), + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(` + + + Test + + + +

Test

+ + + `)) + contentLength, _ := strconv.Atoi(result.ResponseHeaders.Get("Content-Length")) + expect.Equal(t, contentLength, len(result.Data), "Content-Length should be updated") +} + +func TestInjectHTML_NonHTMLContent(t *testing.T) { + opts := OptionsRaw{ + "target": "head", + "html": "", + } + originalBody := []byte(`{"message": "hello world"}`) + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"application/json"}, + }, + respBody: originalBody, + }) + expect.NoError(t, err) + expect.Equal(t, result.Data, originalBody, "Non-HTML content should not be modified") +} + +func TestInjectHTML_TargetNotFound(t *testing.T) { + opts := OptionsRaw{ + "target": ".nonexistent", + "html": "
This should not appear
", + } + originalBody := []byte(` + + + Test + + +

Test

+ + + `) + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: originalBody, + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(string(originalBody)), "Content should remain unchanged when target not found") +} + +func TestInjectHTML_MultipleTargets(t *testing.T) { + opts := OptionsRaw{ + "target": ".container", + "html": "

Injected content

", + } + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: []byte(` + + + +
First container
+
Second container
+ + + `), + }) + expect.NoError(t, err) + // Should only inject into the first matching element + expectedContent := ` + + + +
First container

Injected content

+
Second container
+ + + ` + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(expectedContent)) +} + +func TestInjectHTML_DifferentSelectors(t *testing.T) { + testCases := []struct { + name string + selector string + html string + original string + expected string + }{ + { + name: "ID selector", + selector: "#main", + html: "By ID", + original: `
Content
`, + expected: `
ContentBy ID
`, + }, + { + name: "Class selector", + selector: ".highlight", + html: "By class", + original: `
Content
`, + expected: `
ContentBy class
`, + }, + { + name: "Element selector", + selector: "body", + html: "", + original: `Content`, + expected: `Content`, + }, + { + name: "Attribute selector", + selector: "[data-test='target']", + html: "By attribute", + original: `
Content
`, + expected: `
ContentBy attribute
`, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + opts := OptionsRaw{ + "target": tc.selector, + "html": tc.html, + } + + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: []byte(`` + tc.original + ``), + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(tc.expected)) + }) + } +} + +func TestInjectHTML_EmptyInjection(t *testing.T) { + opts := OptionsRaw{ + "target": "head", + "html": "", + } + originalBody := []byte(`Test`) + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: originalBody, + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(string(originalBody)), "Empty HTML injection should not change content") +} + +func TestInjectHTML_ComplexHTML(t *testing.T) { + opts := OptionsRaw{ + "target": "body", + "html": ``, + } + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html; charset=utf-8"}, + }, + respBody: []byte(` + + + + + Complex Page + + +
+

Welcome

+

Some content here.

+
+ + + `), + }) + expect.NoError(t, err) + + resultStr := removeTabsAndNewlines(result.Data) + expect.Equal(t, resultStr, removeTabsAndNewlines(` + + + + + Complex Page + + +
+

Welcome

+

Some content here.

+
+ + + + `)) + contentLength, _ := strconv.Atoi(result.ResponseHeaders.Get("Content-Length")) + expect.Equal(t, contentLength, len(result.Data), "Content-Length should be updated correctly") +} + +func TestInjectHTML_MalformedHTML(t *testing.T) { + opts := OptionsRaw{ + "target": "body", + "html": "
Valid injection
", + } + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: []byte(`
Unclosed div

Some content`), + }) + expect.NoError(t, err) + // Should handle malformed HTML gracefully + expect.True(t, strings.Contains(string(result.Data), "Valid injection"), "Should inject content even with malformed HTML") +} + +func TestInjectHTML_ContentTypes(t *testing.T) { + testCases := []struct { + name string + contentType string + shouldModify bool + }{ + {"HTML with charset", "text/html; charset=utf-8", true}, + {"Plain HTML", "text/html", true}, + {"XHTML", "application/xhtml+xml", true}, + {"JSON", "application/json", false}, + {"Plain text", "text/plain", false}, + {"JavaScript", "application/javascript", false}, + {"CSS", "text/css", false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + opts := OptionsRaw{ + "target": "body", + "html": "

Test injection
", + } + originalBody := []byte(`Original content`) + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{tc.contentType}, + }, + respBody: originalBody, + }) + expect.NoError(t, err) + + if tc.shouldModify { + expect.True(t, strings.Contains(string(result.Data), "Test injection"), + "Should modify HTML content for content-type: %s", tc.contentType) + } else { + expect.Equal(t, string(result.Data), string(originalBody), + "Should not modify non-HTML content for content-type: %s", tc.contentType) + } + }) + } +} + +func TestInjectHTML_ReplaceTrue(t *testing.T) { + opts := OptionsRaw{ + "target": "body", + "html": "
Replacement content
", + "replace": true, + } + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: []byte(` + + + Test + + +

Original content

+

More original content

+ + + `), + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(` + + + Test + + +
Replacement content
+ + + `)) + contentLength, _ := strconv.Atoi(result.ResponseHeaders.Get("Content-Length")) + expect.Equal(t, contentLength, len(result.Data), "Content-Length should be updated") +} + +func TestInjectHTML_ReplaceVsAppend(t *testing.T) { + originalBody := []byte(`
Original content
`) + + // Test append behavior (default) + appendOpts := OptionsRaw{ + "target": ".target", + "html": "Added content", + } + appendResult, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: appendOpts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: originalBody, + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(appendResult.Data), removeTabsAndNewlines(` +
Original contentAdded content
+ `)) + + // Test replace behavior + replaceOpts := OptionsRaw{ + "target": ".target", + "html": "Replacement content", + "replace": true, + } + replaceResult, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: replaceOpts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: originalBody, + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(replaceResult.Data), removeTabsAndNewlines(` +Replacement content + `)) +} + +func TestInjectHTML_ReplaceWithDifferentSelectors(t *testing.T) { + testCases := []struct { + name string + selector string + original string + html string + expected string + }{ + { + name: "ID selector replace", + selector: "#main", + original: `
Original content
`, + html: "Replaced by ID", + expected: `Replaced by ID`, + }, + { + name: "Class selector replace", + selector: ".highlight", + original: `
Original content
`, + html: "Replaced by class", + expected: `Replaced by class`, + }, + { + name: "Element selector replace", + selector: "body", + original: `Original content`, + html: "
Replaced body content
", + expected: `
Replaced body content
`, + }, + { + name: "Attribute selector replace", + selector: "[data-test='target']", + original: `
Original content
`, + html: "Replaced by attribute", + expected: `Replaced by attribute`, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + opts := OptionsRaw{ + "target": tc.selector, + "html": tc.html, + "replace": true, + } + + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: []byte(`` + tc.original + ``), + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(tc.expected)) + }) + } +} + +func TestInjectHTML_ReplaceWithEmpty(t *testing.T) { + opts := OptionsRaw{ + "target": ".content", + "html": "", + "replace": true, + } + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: []byte(`
Content to be cleared
`), + }) + expect.NoError(t, err) + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(` + + `)) +} + +func TestInjectHTML_ReplaceMultipleTargets(t *testing.T) { + opts := OptionsRaw{ + "target": ".container", + "html": "

Replaced content

", + "replace": true, + } + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html"}, + }, + respBody: []byte(` + + + +
First container content
+
Second container content
+ + + `), + }) + expect.NoError(t, err) + // Should only replace the first matching element + expectedContent := ` + + + +

Replaced content

+

Replaced content

+ + + ` + expect.Equal(t, removeTabsAndNewlines(result.Data), removeTabsAndNewlines(expectedContent)) +} + +func TestInjectHTML_ReplaceComplexHTML(t *testing.T) { + opts := OptionsRaw{ + "target": "main", + "html": `

New Section

This replaces the entire main content.

`, + "replace": true, + } + result, err := newMiddlewareTest(ModifyHTML, &testArgs{ + middlewareOpt: opts, + respHeaders: http.Header{ + "Content-Type": []string{"text/html; charset=utf-8"}, + }, + respBody: []byte(` + + + + + Complex Page + + + +
+

Original Title

+

Original content that will be replaced.

+
More original content
+
+
Footer
+ + + `), + }) + expect.NoError(t, err) + + resultStr := removeTabsAndNewlines(result.Data) + expect.Equal(t, resultStr, removeTabsAndNewlines(` + + + + + Complex Page + + + +

New Section

This replaces the entire main content.

+ + + + `)) + contentLength, _ := strconv.Atoi(result.ResponseHeaders.Get("Content-Length")) + expect.Equal(t, contentLength, len(result.Data), "Content-Length should be updated correctly") +} + +func removeTabsAndNewlines[T string | []byte](s T) string { + replacer := strings.NewReplacer( + "\n", "", + "\r", "", + "\t", "", + ) + return replacer.Replace(string(s)) +}