This commit is contained in:
@@ -14,6 +14,7 @@ import (
|
||||
|
||||
const maxRetriesPerServer = 3
|
||||
const defaultSendTimeout = 3600 * 24 // seconds, used when job.Timeout is 0
|
||||
const defaultPostTimeout = 200
|
||||
|
||||
// WriteSendJob enqueues a SendJob from the main Flutter isolate.
|
||||
// It is a thin wrapper over client.PushSendJob and is safe to call
|
||||
@@ -61,10 +62,14 @@ func ProcessSendQueues(storagePath string) int {
|
||||
// It returns the number of successfully sent messages.
|
||||
//
|
||||
// For each pending job it will:
|
||||
// - immediately mark it failed if its timeout has elapsed
|
||||
// - immediately mark it failed if its TTL (job.Timeout) has elapsed – this is the
|
||||
// only criterion for permanent failure; retry exhaustion is never a failure cause
|
||||
// - attempt delivery, cycling through servers until one succeeds
|
||||
// - mark it sent on success or failed when all servers are exhausted
|
||||
// - stop and return when a job still has retries left (will resume on next call)
|
||||
// - mark it sent on success
|
||||
// - stop and return when all servers fail this run (will resume on next call)
|
||||
//
|
||||
// Per-server retry counts (maxRetriesPerServer) are local to each call so that
|
||||
// past failures in previous runs never prevent future delivery attempts.
|
||||
func processSendQueue(storagePath, queue string) int {
|
||||
sent := 0
|
||||
for {
|
||||
@@ -77,8 +82,13 @@ func processSendQueue(storagePath, queue string) int {
|
||||
return sent // no more pending jobs
|
||||
}
|
||||
|
||||
// Hard timeout: job has been sitting too long
|
||||
if job.Timeout > 0 && time.Since(job.InsertedAt) > time.Duration(job.Timeout)*time.Second {
|
||||
// Hard timeout: the only criterion for permanent failure.
|
||||
// Use defaultSendTimeout when the job carries no explicit TTL.
|
||||
ttl := job.Timeout
|
||||
if ttl <= 0 {
|
||||
ttl = defaultSendTimeout
|
||||
}
|
||||
if time.Since(job.InsertedAt) > time.Duration(ttl)*time.Second {
|
||||
job.Status = client.SendStatusFailed
|
||||
if err := client.UpdateSendJob(storagePath, queue, job); err != nil {
|
||||
logger.Error().Err(err).Int64("id", job.ID).Msg("processSendQueue: UpdateSendJob timeout")
|
||||
@@ -86,7 +96,10 @@ func processSendQueue(storagePath, queue string) int {
|
||||
continue // try the next pending job
|
||||
}
|
||||
|
||||
serverIdx, sendErr := attemptSendJob(job)
|
||||
// runRetries is allocated fresh every call so it never accumulates
|
||||
// across processSendQueue invocations.
|
||||
runRetries := make([]int, len(job.Servers))
|
||||
serverIdx, sendErr := attemptSendJob(job, runRetries)
|
||||
if sendErr == nil {
|
||||
now := time.Now().UTC()
|
||||
job.Status = client.SendStatusSent
|
||||
@@ -99,47 +112,31 @@ func processSendQueue(storagePath, queue string) int {
|
||||
continue // job delivered – look for the next one
|
||||
}
|
||||
|
||||
// Persist updated retry counts regardless of outcome
|
||||
if err := client.UpdateSendJob(storagePath, queue, job); err != nil {
|
||||
logger.Error().Err(err).Int64("id", job.ID).Msg("processSendQueue: UpdateSendJob retries")
|
||||
}
|
||||
|
||||
if allServersExhausted(job) {
|
||||
job.Status = client.SendStatusFailed
|
||||
if err := client.UpdateSendJob(storagePath, queue, job); err != nil {
|
||||
logger.Error().Err(err).Int64("id", job.ID).Msg("processSendQueue: UpdateSendJob failed")
|
||||
}
|
||||
continue // all servers dead for this job – try the next one
|
||||
}
|
||||
|
||||
// Job still has remaining retries on some server; stop and wait for the next poll
|
||||
// All servers failed this run; stop and wait for the next poll.
|
||||
// Permanent failure is decided solely by the TTL check above.
|
||||
return sent
|
||||
}
|
||||
}
|
||||
|
||||
// attemptSendJob reads the pre-built packed message from job.File and tries
|
||||
// each server in order, skipping any server that has already reached
|
||||
// maxRetriesPerServer failures.
|
||||
// maxRetriesPerServer failures within the current run.
|
||||
// On the first successful POST it returns the server index.
|
||||
// All retry counts are incremented in-place inside job.Retries.
|
||||
func attemptSendJob(job *client.SendJob) (int, error) {
|
||||
// Retry counts are tracked in the caller-supplied retries slice (run-local,
|
||||
// never persisted) so that previous runs do not influence this attempt.
|
||||
func attemptSendJob(job *client.SendJob, retries []int) (int, error) {
|
||||
data, err := os.ReadFile(job.File)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
// Ensure the retries slice is aligned with the servers slice
|
||||
for len(job.Retries) < len(job.Servers) {
|
||||
job.Retries = append(job.Retries, 0)
|
||||
}
|
||||
|
||||
timeout := job.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = defaultSendTimeout
|
||||
// Ensure the retries slice is aligned with the servers slice.
|
||||
for len(retries) < len(job.Servers) {
|
||||
retries = append(retries, 0)
|
||||
}
|
||||
|
||||
for i, srv := range job.Servers {
|
||||
if job.Retries[i] >= maxRetriesPerServer {
|
||||
continue // this server is exhausted
|
||||
if retries[i] >= maxRetriesPerServer {
|
||||
continue // this server is exhausted for the current run
|
||||
}
|
||||
|
||||
// Unmarshal the stored PackedUserMessage and wrap it for this server.
|
||||
@@ -150,31 +147,17 @@ func attemptSendJob(job *client.SendJob) (int, error) {
|
||||
serverData, errTxt, packErr := PackMessageForServer(packedUsrMsg, srv.GetUid())
|
||||
if packErr != nil {
|
||||
logger.Error().Err(packErr).Str("errTxt", errTxt).Str("url", srv.Url).Msg("attemptSendJob: PackMessageForServer")
|
||||
job.Retries[i]++
|
||||
retries[i]++
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = meowlib.HttpPostMessage(srv.Url, serverData, timeout)
|
||||
_, err = meowlib.HttpPostMessage(srv.Url, serverData, defaultPostTimeout)
|
||||
if err != nil {
|
||||
logger.Warn().Err(err).Str("url", srv.Url).Int("retry", job.Retries[i]+1).Msg("attemptSendJob: POST failed")
|
||||
job.Retries[i]++
|
||||
logger.Warn().Err(err).Str("url", srv.Url).Int("retry", retries[i]+1).Msg("attemptSendJob: POST failed")
|
||||
retries[i]++
|
||||
continue
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
return -1, errors.New("all servers failed or exhausted")
|
||||
}
|
||||
|
||||
// allServersExhausted returns true when every server in the job has been tried
|
||||
// maxRetriesPerServer times without success.
|
||||
func allServersExhausted(job *client.SendJob) bool {
|
||||
if len(job.Servers) == 0 {
|
||||
return true
|
||||
}
|
||||
for i := range job.Servers {
|
||||
if i >= len(job.Retries) || job.Retries[i] < maxRetriesPerServer {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user