mirror of
https://github.com/yusing/godoxy.git
synced 2025-05-19 20:32:35 +02:00
feat: implement experimental BackScanner for reading files backward and add tests for various scenarios
This commit is contained in:
parent
366fede517
commit
2f476603d3
4 changed files with 436 additions and 0 deletions
104
internal/net/gphttp/accesslog/back_scanner.go
Normal file
104
internal/net/gphttp/accesslog/back_scanner.go
Normal file
|
@ -0,0 +1,104 @@
|
|||
package accesslog
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
)
|
||||
|
||||
// BackScanner provides an interface to read a file backward line by line.
|
||||
type BackScanner struct {
|
||||
file AccessLogIO
|
||||
chunkSize int
|
||||
offset int64
|
||||
buffer []byte
|
||||
line []byte
|
||||
err error
|
||||
size int64
|
||||
}
|
||||
|
||||
// NewBackScanner creates a new Scanner to read the file backward.
|
||||
// chunkSize determines the size of each read chunk from the end of the file.
|
||||
func NewBackScanner(file AccessLogIO, chunkSize int) *BackScanner {
|
||||
size, err := file.Seek(0, io.SeekEnd)
|
||||
if err != nil {
|
||||
return &BackScanner{err: err}
|
||||
}
|
||||
return &BackScanner{
|
||||
file: file,
|
||||
chunkSize: chunkSize,
|
||||
offset: size,
|
||||
size: size,
|
||||
}
|
||||
}
|
||||
|
||||
// Scan advances the scanner to the previous line, which will then be available
|
||||
// via the Bytes method. It returns false when there are no more lines.
|
||||
func (s *BackScanner) Scan() bool {
|
||||
if s.err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Read chunks until a newline is found or the file is fully read
|
||||
for {
|
||||
// Check if there's a line in the buffer
|
||||
if idx := bytes.LastIndexByte(s.buffer, '\n'); idx >= 0 {
|
||||
s.line = s.buffer[idx+1:]
|
||||
s.buffer = s.buffer[:idx]
|
||||
if len(s.line) > 0 {
|
||||
return true
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
for {
|
||||
if s.offset <= 0 {
|
||||
// No more data to read; check remaining buffer
|
||||
if len(s.buffer) > 0 {
|
||||
s.line = s.buffer
|
||||
s.buffer = nil
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
newOffset := max(0, s.offset-int64(s.chunkSize))
|
||||
chunkSize := s.offset - newOffset
|
||||
chunk := make([]byte, chunkSize)
|
||||
|
||||
n, err := s.file.ReadAt(chunk, newOffset)
|
||||
if err != nil && err != io.EOF {
|
||||
s.err = err
|
||||
return false
|
||||
}
|
||||
|
||||
// Prepend the chunk to the buffer
|
||||
s.buffer = append(chunk[:n], s.buffer...)
|
||||
s.offset = newOffset
|
||||
|
||||
// Check for newline in the updated buffer
|
||||
if idx := bytes.LastIndexByte(s.buffer, '\n'); idx >= 0 {
|
||||
s.line = s.buffer[idx+1:]
|
||||
s.buffer = s.buffer[:idx]
|
||||
if len(s.line) > 0 {
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bytes returns the most recent line generated by a call to Scan.
|
||||
func (s *BackScanner) Bytes() []byte {
|
||||
return s.line
|
||||
}
|
||||
|
||||
// FileSize returns the size of the file.
|
||||
func (s *BackScanner) FileSize() int64 {
|
||||
return s.size
|
||||
}
|
||||
|
||||
// Err returns the first non-EOF error encountered by the scanner.
|
||||
func (s *BackScanner) Err() error {
|
||||
return s.err
|
||||
}
|
127
internal/net/gphttp/accesslog/back_scanner_test.go
Normal file
127
internal/net/gphttp/accesslog/back_scanner_test.go
Normal file
|
@ -0,0 +1,127 @@
|
|||
package accesslog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBackScanner(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "empty file",
|
||||
input: "",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "single line without newline",
|
||||
input: "single line",
|
||||
expected: []string{"single line"},
|
||||
},
|
||||
{
|
||||
name: "single line with newline",
|
||||
input: "single line\n",
|
||||
expected: []string{"single line"},
|
||||
},
|
||||
{
|
||||
name: "multiple lines",
|
||||
input: "first\nsecond\nthird\n",
|
||||
expected: []string{"third", "second", "first"},
|
||||
},
|
||||
{
|
||||
name: "multiple lines without final newline",
|
||||
input: "first\nsecond\nthird",
|
||||
expected: []string{"third", "second", "first"},
|
||||
},
|
||||
{
|
||||
name: "lines longer than chunk size",
|
||||
input: "short\n" + strings.Repeat("a", 20) + "\nshort\n",
|
||||
expected: []string{"short", strings.Repeat("a", 20), "short"},
|
||||
},
|
||||
{
|
||||
name: "empty lines",
|
||||
input: "first\n\n\nlast\n",
|
||||
expected: []string{"last", "first"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Setup mock file
|
||||
mockFile := &MockFile{}
|
||||
_, err := mockFile.Write([]byte(tt.input))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to write to mock file: %v", err)
|
||||
}
|
||||
|
||||
// Create scanner with small chunk size to test chunking
|
||||
scanner := NewBackScanner(mockFile, 10)
|
||||
|
||||
// Collect all lines
|
||||
var lines [][]byte
|
||||
for scanner.Scan() {
|
||||
lines = append(lines, scanner.Bytes())
|
||||
}
|
||||
|
||||
// Check for scanning errors
|
||||
if err := scanner.Err(); err != nil {
|
||||
t.Errorf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
// Compare results
|
||||
if len(lines) != len(tt.expected) {
|
||||
t.Errorf("got %d lines, want %d lines", len(lines), len(tt.expected))
|
||||
return
|
||||
}
|
||||
|
||||
for i, line := range lines {
|
||||
if string(line) != tt.expected[i] {
|
||||
t.Errorf("line %d: got %q, want %q", i, line, tt.expected[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackScannerWithVaryingChunkSizes(t *testing.T) {
|
||||
input := "first\nsecond\nthird\nfourth\nfifth\n"
|
||||
expected := []string{"fifth", "fourth", "third", "second", "first"}
|
||||
chunkSizes := []int{1, 2, 3, 5, 10, 20, 100}
|
||||
|
||||
for _, chunkSize := range chunkSizes {
|
||||
t.Run(fmt.Sprintf("chunk_size_%d", chunkSize), func(t *testing.T) {
|
||||
mockFile := &MockFile{}
|
||||
_, err := mockFile.Write([]byte(input))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to write to mock file: %v", err)
|
||||
}
|
||||
|
||||
scanner := NewBackScanner(mockFile, chunkSize)
|
||||
|
||||
var lines [][]byte
|
||||
for scanner.Scan() {
|
||||
lines = append(lines, scanner.Bytes())
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
t.Errorf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
if len(lines) != len(expected) {
|
||||
t.Errorf("got %d lines, want %d lines", len(lines), len(expected))
|
||||
return
|
||||
}
|
||||
|
||||
for i, line := range lines {
|
||||
if string(line) != expected[i] {
|
||||
t.Errorf("chunk size %d, line %d: got %q, want %q",
|
||||
chunkSize, i, line, expected[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
119
internal/net/gphttp/accesslog/rotate.go
Normal file
119
internal/net/gphttp/accesslog/rotate.go
Normal file
|
@ -0,0 +1,119 @@
|
|||
package accesslog
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (l *AccessLogger) rotate() (err error) {
|
||||
// Get retention configuration
|
||||
config := l.Config().Retention
|
||||
var shouldKeep func(t time.Time, lineCount int) bool
|
||||
|
||||
if config.Last > 0 {
|
||||
shouldKeep = func(_ time.Time, lineCount int) bool {
|
||||
return lineCount < int(config.Last)
|
||||
}
|
||||
} else if config.Days > 0 {
|
||||
cutoff := time.Now().AddDate(0, 0, -int(config.Days))
|
||||
shouldKeep = func(t time.Time, _ int) bool {
|
||||
return !t.IsZero() && !t.Before(cutoff)
|
||||
}
|
||||
} else {
|
||||
return nil // No retention policy set
|
||||
}
|
||||
|
||||
s := NewBackScanner(l.io, defaultChunkSize)
|
||||
nRead := 0
|
||||
nLines := 0
|
||||
for s.Scan() {
|
||||
nRead += len(s.Bytes()) + 1
|
||||
nLines++
|
||||
t := ParseLogTime(s.Bytes())
|
||||
if !shouldKeep(t, nLines) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if s.Err() != nil {
|
||||
return s.Err()
|
||||
}
|
||||
|
||||
beg := int64(nRead)
|
||||
if _, err := l.io.Seek(-beg, io.SeekEnd); err != nil {
|
||||
return err
|
||||
}
|
||||
buf := make([]byte, nRead)
|
||||
if _, err := l.io.Read(buf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := l.writeTruncate(buf); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *AccessLogger) writeTruncate(buf []byte) (err error) {
|
||||
// Seek to beginning and truncate
|
||||
if _, err := l.io.Seek(0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write buffer back to file
|
||||
nWritten, err := l.buffered.Write(buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = l.buffered.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Truncate file
|
||||
if err = l.io.Truncate(int64(nWritten)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// check bytes written == buffer size
|
||||
if nWritten != len(buf) {
|
||||
return io.ErrShortWrite
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const timeLen = len(`"time":"`)
|
||||
|
||||
var timeJSON = []byte(`"time":"`)
|
||||
|
||||
func ParseLogTime(line []byte) (t time.Time) {
|
||||
if len(line) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if i := bytes.Index(line, timeJSON); i != -1 { // JSON format
|
||||
var jsonStart = i + timeLen
|
||||
var jsonEnd = i + timeLen + len(LogTimeFormat)
|
||||
if len(line) < jsonEnd {
|
||||
return
|
||||
}
|
||||
timeStr := line[jsonStart:jsonEnd]
|
||||
t, _ = time.Parse(LogTimeFormat, string(timeStr))
|
||||
return
|
||||
}
|
||||
|
||||
// Common/Combined format
|
||||
// Format: <virtual host> <host ip> - - [02/Jan/2006:15:04:05 -0700] ...
|
||||
start := bytes.IndexByte(line, '[')
|
||||
if start == -1 {
|
||||
return
|
||||
}
|
||||
end := bytes.IndexByte(line[start:], ']')
|
||||
if end == -1 {
|
||||
return
|
||||
}
|
||||
end += start // adjust end position relative to full line
|
||||
|
||||
timeStr := line[start+1 : end]
|
||||
t, _ = time.Parse(LogTimeFormat, string(timeStr)) // ignore error
|
||||
return
|
||||
}
|
86
internal/net/gphttp/accesslog/rotate_test.go
Normal file
86
internal/net/gphttp/accesslog/rotate_test.go
Normal file
|
@ -0,0 +1,86 @@
|
|||
package accesslog_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
. "github.com/yusing/go-proxy/internal/net/gphttp/accesslog"
|
||||
"github.com/yusing/go-proxy/internal/task"
|
||||
"github.com/yusing/go-proxy/internal/utils/strutils"
|
||||
. "github.com/yusing/go-proxy/internal/utils/testing"
|
||||
)
|
||||
|
||||
func TestParseLogTime(t *testing.T) {
|
||||
tests := []string{
|
||||
`{"foo":"bar","time":"%s","bar":"baz"}`,
|
||||
`example.com 192.168.1.1 - - [%s] "GET / HTTP/1.1" 200 1234`,
|
||||
}
|
||||
testTime := time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC)
|
||||
testTimeStr := testTime.Format(LogTimeFormat)
|
||||
|
||||
for i, test := range tests {
|
||||
tests[i] = fmt.Sprintf(test, testTimeStr)
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test, func(t *testing.T) {
|
||||
actual := ParseLogTime([]byte(test))
|
||||
ExpectTrue(t, actual.Equal(testTime))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetentionCommonFormat(t *testing.T) {
|
||||
var file MockFile
|
||||
logger := NewAccessLogger(task.RootTask("test", false), &file, &Config{
|
||||
Format: FormatCommon,
|
||||
BufferSize: 1024,
|
||||
})
|
||||
for range 10 {
|
||||
logger.Log(req, resp)
|
||||
}
|
||||
logger.Flush()
|
||||
// test.Finish(nil)
|
||||
|
||||
ExpectEqual(t, logger.Config().Retention, nil)
|
||||
ExpectTrue(t, file.Len() > 0)
|
||||
ExpectEqual(t, file.LineCount(), 10)
|
||||
|
||||
t.Run("keep last", func(t *testing.T) {
|
||||
logger.Config().Retention = strutils.MustParse[*Retention]("last 5")
|
||||
ExpectEqual(t, logger.Config().Retention.Days, 0)
|
||||
ExpectEqual(t, logger.Config().Retention.Last, 5)
|
||||
ExpectNoError(t, logger.Rotate())
|
||||
ExpectEqual(t, file.LineCount(), 5)
|
||||
})
|
||||
|
||||
_ = file.Truncate(0)
|
||||
|
||||
timeNow := time.Now()
|
||||
for i := range 10 {
|
||||
logger.Formatter.(*CommonFormatter).GetTimeNow = func() time.Time {
|
||||
return timeNow.AddDate(0, 0, -10+i)
|
||||
}
|
||||
logger.Log(req, resp)
|
||||
}
|
||||
logger.Flush()
|
||||
ExpectEqual(t, file.LineCount(), 10)
|
||||
|
||||
t.Run("keep days", func(t *testing.T) {
|
||||
logger.Config().Retention = strutils.MustParse[*Retention]("3 days")
|
||||
ExpectEqual(t, logger.Config().Retention.Days, 3)
|
||||
ExpectEqual(t, logger.Config().Retention.Last, 0)
|
||||
ExpectNoError(t, logger.Rotate())
|
||||
ExpectEqual(t, file.LineCount(), 3)
|
||||
rotated := string(file.Content())
|
||||
_ = file.Truncate(0)
|
||||
for i := range 3 {
|
||||
logger.Formatter.(*CommonFormatter).GetTimeNow = func() time.Time {
|
||||
return timeNow.AddDate(0, 0, -3+i)
|
||||
}
|
||||
logger.Log(req, resp)
|
||||
}
|
||||
ExpectEqual(t, rotated, string(file.Content()))
|
||||
})
|
||||
}
|
Loading…
Add table
Reference in a new issue