Files
meowlib/lokiwriter.go
ycc c784f6f315
Some checks failed
continuous-integration/drone/push Build is failing
loki disable if non valid url
2026-02-09 19:06:47 +01:00

183 lines
4.2 KiB
Go

package meowlib
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"net/url"
"sync"
"time"
)
type LokiWriter struct {
url string
labels map[string]string
httpClient *http.Client
disabled bool
// Circuit breaker fields
mu sync.RWMutex
failureCount int
circuitOpen bool
lastFailureTime time.Time
lastWarningTime time.Time
}
type LokiPayload struct {
Streams []LokiStream `json:"streams"`
}
type LokiStream struct {
Stream map[string]string `json:"stream"`
Values [][]string `json:"values"`
}
const (
// Circuit breaker configuration
maxFailures = 3 // Open circuit after this many consecutive failures
circuitOpenTime = 5 * time.Minute // How long to keep circuit open
warningInterval = 1 * time.Minute // Minimum time between warning messages
)
func NewLokiWriter(rawURL string, labels map[string]string) *LokiWriter {
disabled := false
if rawURL == "" {
disabled = true
} else {
u, err := url.ParseRequestURI(rawURL)
if err != nil || (u.Scheme != "http" && u.Scheme != "https") || u.Host == "" {
disabled = true
}
}
return &LokiWriter{
url: rawURL,
labels: labels,
httpClient: &http.Client{},
disabled: disabled,
}
}
func (w *LokiWriter) Write(p []byte) (n int, err error) {
if w.disabled {
return len(p), nil
}
// Check circuit breaker status
w.mu.RLock()
if w.circuitOpen {
// Check if it's time to retry
if time.Since(w.lastFailureTime) < circuitOpenTime {
w.mu.RUnlock()
// Circuit is open, silently discard log to avoid spam
return len(p), nil
}
w.mu.RUnlock()
// Time to retry - acquire write lock to close circuit
w.mu.Lock()
w.circuitOpen = false
w.failureCount = 0
w.mu.Unlock()
} else {
w.mu.RUnlock()
}
// Use zerolog to parse the log level
var event map[string]interface{}
if err := json.Unmarshal(p, &event); err != nil {
// Don't fail on unmarshal errors, just silently continue
return len(p), nil
}
level := ""
if l, ok := event["level"].(string); ok {
level = l
}
message := ""
if m, ok := event["message"].(string); ok {
message = m
}
// Add log level to labels
labels := make(map[string]string)
for k, v := range w.labels {
labels[k] = v
}
labels["level"] = level
// Format the timestamp in nanoseconds
timestamp := fmt.Sprintf("%d000000", time.Now().UnixNano()/int64(time.Millisecond))
stream := LokiStream{
Stream: labels,
Values: [][]string{
{timestamp, message},
},
}
payload := LokiPayload{
Streams: []LokiStream{stream},
}
payloadBytes, err := json.Marshal(payload)
if err != nil {
// Don't fail on marshal errors
return len(p), nil
}
req, err := http.NewRequest("POST", w.url, bytes.NewReader(payloadBytes))
if err != nil {
w.recordFailure(fmt.Sprintf("failed to create HTTP request: %v", err))
return len(p), nil
}
req.Header.Set("Content-Type", "application/json")
resp, err := w.httpClient.Do(req)
if err != nil {
w.recordFailure(fmt.Sprintf("failed to send log to Loki: %v", err))
return len(p), nil
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusNoContent {
w.recordFailure(fmt.Sprintf("received non-204 response from Loki: %d", resp.StatusCode))
return len(p), nil
}
// Success - reset failure count
w.mu.Lock()
if w.failureCount > 0 {
// Circuit was previously failing but now recovered
fmt.Printf("LokiWriter: connection restored to %s\n", w.url)
w.failureCount = 0
}
w.mu.Unlock()
return len(p), nil
}
// recordFailure handles a Loki write failure and opens circuit if needed
func (w *LokiWriter) recordFailure(errMsg string) {
w.mu.Lock()
defer w.mu.Unlock()
w.failureCount++
w.lastFailureTime = time.Now()
// Only print warnings if enough time has passed since last warning
shouldWarn := time.Since(w.lastWarningTime) >= warningInterval
if w.failureCount >= maxFailures && !w.circuitOpen {
w.circuitOpen = true
if shouldWarn {
fmt.Printf("LokiWriter: circuit breaker opened after %d failures (last error: %s). Remote logging disabled for %v.\n",
w.failureCount, errMsg, circuitOpenTime)
w.lastWarningTime = time.Now()
}
} else if !w.circuitOpen && shouldWarn {
fmt.Printf("LokiWriter: warning - %s (failure %d/%d)\n", errMsg, w.failureCount, maxFailures)
w.lastWarningTime = time.Now()
}
}