feat: implement experimental BackScanner for reading files backward and add tests for various scenarios

This commit is contained in:
yusing 2025-03-28 08:12:37 +08:00
parent 366fede517
commit 2f476603d3
4 changed files with 436 additions and 0 deletions

View file

@ -0,0 +1,104 @@
package accesslog
import (
"bytes"
"io"
)
// BackScanner provides an interface to read a file backward line by line.
type BackScanner struct {
file AccessLogIO
chunkSize int
offset int64
buffer []byte
line []byte
err error
size int64
}
// NewBackScanner creates a new Scanner to read the file backward.
// chunkSize determines the size of each read chunk from the end of the file.
func NewBackScanner(file AccessLogIO, chunkSize int) *BackScanner {
size, err := file.Seek(0, io.SeekEnd)
if err != nil {
return &BackScanner{err: err}
}
return &BackScanner{
file: file,
chunkSize: chunkSize,
offset: size,
size: size,
}
}
// Scan advances the scanner to the previous line, which will then be available
// via the Bytes method. It returns false when there are no more lines.
func (s *BackScanner) Scan() bool {
if s.err != nil {
return false
}
// Read chunks until a newline is found or the file is fully read
for {
// Check if there's a line in the buffer
if idx := bytes.LastIndexByte(s.buffer, '\n'); idx >= 0 {
s.line = s.buffer[idx+1:]
s.buffer = s.buffer[:idx]
if len(s.line) > 0 {
return true
}
continue
}
for {
if s.offset <= 0 {
// No more data to read; check remaining buffer
if len(s.buffer) > 0 {
s.line = s.buffer
s.buffer = nil
return true
}
return false
}
newOffset := max(0, s.offset-int64(s.chunkSize))
chunkSize := s.offset - newOffset
chunk := make([]byte, chunkSize)
n, err := s.file.ReadAt(chunk, newOffset)
if err != nil && err != io.EOF {
s.err = err
return false
}
// Prepend the chunk to the buffer
s.buffer = append(chunk[:n], s.buffer...)
s.offset = newOffset
// Check for newline in the updated buffer
if idx := bytes.LastIndexByte(s.buffer, '\n'); idx >= 0 {
s.line = s.buffer[idx+1:]
s.buffer = s.buffer[:idx]
if len(s.line) > 0 {
return true
}
break
}
}
}
}
// Bytes returns the most recent line generated by a call to Scan.
func (s *BackScanner) Bytes() []byte {
return s.line
}
// FileSize returns the size of the file.
func (s *BackScanner) FileSize() int64 {
return s.size
}
// Err returns the first non-EOF error encountered by the scanner.
func (s *BackScanner) Err() error {
return s.err
}

View file

@ -0,0 +1,127 @@
package accesslog
import (
"fmt"
"strings"
"testing"
)
func TestBackScanner(t *testing.T) {
tests := []struct {
name string
input string
expected []string
}{
{
name: "empty file",
input: "",
expected: []string{},
},
{
name: "single line without newline",
input: "single line",
expected: []string{"single line"},
},
{
name: "single line with newline",
input: "single line\n",
expected: []string{"single line"},
},
{
name: "multiple lines",
input: "first\nsecond\nthird\n",
expected: []string{"third", "second", "first"},
},
{
name: "multiple lines without final newline",
input: "first\nsecond\nthird",
expected: []string{"third", "second", "first"},
},
{
name: "lines longer than chunk size",
input: "short\n" + strings.Repeat("a", 20) + "\nshort\n",
expected: []string{"short", strings.Repeat("a", 20), "short"},
},
{
name: "empty lines",
input: "first\n\n\nlast\n",
expected: []string{"last", "first"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Setup mock file
mockFile := &MockFile{}
_, err := mockFile.Write([]byte(tt.input))
if err != nil {
t.Fatalf("failed to write to mock file: %v", err)
}
// Create scanner with small chunk size to test chunking
scanner := NewBackScanner(mockFile, 10)
// Collect all lines
var lines [][]byte
for scanner.Scan() {
lines = append(lines, scanner.Bytes())
}
// Check for scanning errors
if err := scanner.Err(); err != nil {
t.Errorf("scanner error: %v", err)
}
// Compare results
if len(lines) != len(tt.expected) {
t.Errorf("got %d lines, want %d lines", len(lines), len(tt.expected))
return
}
for i, line := range lines {
if string(line) != tt.expected[i] {
t.Errorf("line %d: got %q, want %q", i, line, tt.expected[i])
}
}
})
}
}
func TestBackScannerWithVaryingChunkSizes(t *testing.T) {
input := "first\nsecond\nthird\nfourth\nfifth\n"
expected := []string{"fifth", "fourth", "third", "second", "first"}
chunkSizes := []int{1, 2, 3, 5, 10, 20, 100}
for _, chunkSize := range chunkSizes {
t.Run(fmt.Sprintf("chunk_size_%d", chunkSize), func(t *testing.T) {
mockFile := &MockFile{}
_, err := mockFile.Write([]byte(input))
if err != nil {
t.Fatalf("failed to write to mock file: %v", err)
}
scanner := NewBackScanner(mockFile, chunkSize)
var lines [][]byte
for scanner.Scan() {
lines = append(lines, scanner.Bytes())
}
if err := scanner.Err(); err != nil {
t.Errorf("scanner error: %v", err)
}
if len(lines) != len(expected) {
t.Errorf("got %d lines, want %d lines", len(lines), len(expected))
return
}
for i, line := range lines {
if string(line) != expected[i] {
t.Errorf("chunk size %d, line %d: got %q, want %q",
chunkSize, i, line, expected[i])
}
}
})
}
}

View file

@ -0,0 +1,119 @@
package accesslog
import (
"bytes"
"io"
"time"
)
func (l *AccessLogger) rotate() (err error) {
// Get retention configuration
config := l.Config().Retention
var shouldKeep func(t time.Time, lineCount int) bool
if config.Last > 0 {
shouldKeep = func(_ time.Time, lineCount int) bool {
return lineCount < int(config.Last)
}
} else if config.Days > 0 {
cutoff := time.Now().AddDate(0, 0, -int(config.Days))
shouldKeep = func(t time.Time, _ int) bool {
return !t.IsZero() && !t.Before(cutoff)
}
} else {
return nil // No retention policy set
}
s := NewBackScanner(l.io, defaultChunkSize)
nRead := 0
nLines := 0
for s.Scan() {
nRead += len(s.Bytes()) + 1
nLines++
t := ParseLogTime(s.Bytes())
if !shouldKeep(t, nLines) {
break
}
}
if s.Err() != nil {
return s.Err()
}
beg := int64(nRead)
if _, err := l.io.Seek(-beg, io.SeekEnd); err != nil {
return err
}
buf := make([]byte, nRead)
if _, err := l.io.Read(buf); err != nil {
return err
}
if err := l.writeTruncate(buf); err != nil {
return err
}
return nil
}
func (l *AccessLogger) writeTruncate(buf []byte) (err error) {
// Seek to beginning and truncate
if _, err := l.io.Seek(0, 0); err != nil {
return err
}
// Write buffer back to file
nWritten, err := l.buffered.Write(buf)
if err != nil {
return err
}
if err = l.buffered.Flush(); err != nil {
return err
}
// Truncate file
if err = l.io.Truncate(int64(nWritten)); err != nil {
return err
}
// check bytes written == buffer size
if nWritten != len(buf) {
return io.ErrShortWrite
}
return
}
const timeLen = len(`"time":"`)
var timeJSON = []byte(`"time":"`)
func ParseLogTime(line []byte) (t time.Time) {
if len(line) == 0 {
return
}
if i := bytes.Index(line, timeJSON); i != -1 { // JSON format
var jsonStart = i + timeLen
var jsonEnd = i + timeLen + len(LogTimeFormat)
if len(line) < jsonEnd {
return
}
timeStr := line[jsonStart:jsonEnd]
t, _ = time.Parse(LogTimeFormat, string(timeStr))
return
}
// Common/Combined format
// Format: <virtual host> <host ip> - - [02/Jan/2006:15:04:05 -0700] ...
start := bytes.IndexByte(line, '[')
if start == -1 {
return
}
end := bytes.IndexByte(line[start:], ']')
if end == -1 {
return
}
end += start // adjust end position relative to full line
timeStr := line[start+1 : end]
t, _ = time.Parse(LogTimeFormat, string(timeStr)) // ignore error
return
}

View file

@ -0,0 +1,86 @@
package accesslog_test
import (
"fmt"
"testing"
"time"
. "github.com/yusing/go-proxy/internal/net/gphttp/accesslog"
"github.com/yusing/go-proxy/internal/task"
"github.com/yusing/go-proxy/internal/utils/strutils"
. "github.com/yusing/go-proxy/internal/utils/testing"
)
func TestParseLogTime(t *testing.T) {
tests := []string{
`{"foo":"bar","time":"%s","bar":"baz"}`,
`example.com 192.168.1.1 - - [%s] "GET / HTTP/1.1" 200 1234`,
}
testTime := time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC)
testTimeStr := testTime.Format(LogTimeFormat)
for i, test := range tests {
tests[i] = fmt.Sprintf(test, testTimeStr)
}
for _, test := range tests {
t.Run(test, func(t *testing.T) {
actual := ParseLogTime([]byte(test))
ExpectTrue(t, actual.Equal(testTime))
})
}
}
func TestRetentionCommonFormat(t *testing.T) {
var file MockFile
logger := NewAccessLogger(task.RootTask("test", false), &file, &Config{
Format: FormatCommon,
BufferSize: 1024,
})
for range 10 {
logger.Log(req, resp)
}
logger.Flush()
// test.Finish(nil)
ExpectEqual(t, logger.Config().Retention, nil)
ExpectTrue(t, file.Len() > 0)
ExpectEqual(t, file.LineCount(), 10)
t.Run("keep last", func(t *testing.T) {
logger.Config().Retention = strutils.MustParse[*Retention]("last 5")
ExpectEqual(t, logger.Config().Retention.Days, 0)
ExpectEqual(t, logger.Config().Retention.Last, 5)
ExpectNoError(t, logger.Rotate())
ExpectEqual(t, file.LineCount(), 5)
})
_ = file.Truncate(0)
timeNow := time.Now()
for i := range 10 {
logger.Formatter.(*CommonFormatter).GetTimeNow = func() time.Time {
return timeNow.AddDate(0, 0, -10+i)
}
logger.Log(req, resp)
}
logger.Flush()
ExpectEqual(t, file.LineCount(), 10)
t.Run("keep days", func(t *testing.T) {
logger.Config().Retention = strutils.MustParse[*Retention]("3 days")
ExpectEqual(t, logger.Config().Retention.Days, 3)
ExpectEqual(t, logger.Config().Retention.Last, 0)
ExpectNoError(t, logger.Rotate())
ExpectEqual(t, file.LineCount(), 3)
rotated := string(file.Content())
_ = file.Truncate(0)
for i := range 3 {
logger.Formatter.(*CommonFormatter).GetTimeNow = func() time.Time {
return timeNow.AddDate(0, 0, -3+i)
}
logger.Log(req, resp)
}
ExpectEqual(t, rotated, string(file.Content()))
})
}