mirror of
https://github.com/aptible/supercronic.git
synced 2025-09-26 20:31:17 +08:00
Warn when a job should start but is still running
This will make it easier for users to debug hung jobs: rather than requiring users to go back to when the job started to realize (implicitly) that it's not running, this will warn at every interval that something is wrong.
This commit is contained in:
33
cron/cron.go
33
cron/cron.go
@@ -2,6 +2,7 @@ package cron
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/aptible/supercronic/crontab"
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -59,17 +60,17 @@ func startReaderDrain(wg *sync.WaitGroup, readerLogger *logrus.Entry, reader io.
|
||||
}()
|
||||
}
|
||||
|
||||
func runJob(context *crontab.Context, command string, jobLogger *logrus.Entry) error {
|
||||
func runJob(cronCtx *crontab.Context, command string, jobLogger *logrus.Entry) error {
|
||||
jobLogger.Info("starting")
|
||||
|
||||
cmd := exec.Command(context.Shell, "-c", command)
|
||||
cmd := exec.Command(cronCtx.Shell, "-c", command)
|
||||
|
||||
// Run in a separate process group so that in interactive usage, CTRL+C
|
||||
// stops supercronic, not the children threads.
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
|
||||
env := os.Environ()
|
||||
for k, v := range context.Environ {
|
||||
for k, v := range cronCtx.Environ {
|
||||
env = append(env, fmt.Sprintf("%s=%s", k, v))
|
||||
}
|
||||
cmd.Env = env
|
||||
@@ -105,7 +106,22 @@ func runJob(context *crontab.Context, command string, jobLogger *logrus.Entry) e
|
||||
return nil
|
||||
}
|
||||
|
||||
func StartJob(wg *sync.WaitGroup, context *crontab.Context, job *crontab.Job, exitChan chan interface{}, cronLogger *logrus.Entry) {
|
||||
func monitorJob(ctx context.Context, expression crontab.Expression, t0 time.Time, jobLogger *logrus.Entry) {
|
||||
t := t0
|
||||
|
||||
for {
|
||||
t = expression.Next(t)
|
||||
|
||||
select {
|
||||
case <-time.After(time.Until(t)):
|
||||
jobLogger.Warnf("not starting: job is still running since %s (%s elapsed)", t0, t.Sub(t0))
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func StartJob(wg *sync.WaitGroup, cronCtx *crontab.Context, job *crontab.Job, exitChan chan interface{}, cronLogger *logrus.Entry) {
|
||||
wg.Add(1)
|
||||
|
||||
go func() {
|
||||
@@ -139,7 +155,14 @@ func StartJob(wg *sync.WaitGroup, context *crontab.Context, job *crontab.Job, ex
|
||||
"iteration": cronIteration,
|
||||
})
|
||||
|
||||
err := runJob(context, job.Command, jobLogger)
|
||||
err := func() error {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go monitorJob(ctx, job.Expression, nextRun, jobLogger)
|
||||
|
||||
return runJob(cronCtx, job.Command, jobLogger)
|
||||
}()
|
||||
|
||||
if err == nil {
|
||||
jobLogger.Info("job succeeded")
|
||||
|
@@ -1,12 +1,12 @@
|
||||
function run_supercronic() {
|
||||
local crontab="$1"
|
||||
local timeout="${2:-2s}"
|
||||
local timeout="${2:-1s}"
|
||||
timeout --preserve-status --kill-after "30s" "$timeout" \
|
||||
"${BATS_TEST_DIRNAME}/../supercronic" ${SUPERCRONIC_ARGS:-} "$crontab" 2>&1
|
||||
}
|
||||
|
||||
@test "it starts" {
|
||||
run_supercronic "${BATS_TEST_DIRNAME}/noop.crontab" 2s
|
||||
run_supercronic "${BATS_TEST_DIRNAME}/noop.crontab"
|
||||
}
|
||||
|
||||
@test "it runs a cron job" {
|
||||
@@ -23,11 +23,16 @@ function run_supercronic() {
|
||||
}
|
||||
|
||||
@test "it warns when USER is set" {
|
||||
run_supercronic "${BATS_TEST_DIRNAME}/user.crontab" 5s | grep -iE "processes will not.*USER="
|
||||
run_supercronic "${BATS_TEST_DIRNAME}/user.crontab" 1s | grep -iE "processes will not.*USER="
|
||||
}
|
||||
|
||||
@test "it warns when a job is falling behind" {
|
||||
run_supercronic "${BATS_TEST_DIRNAME}/timeout.crontab" 5s | grep -iE "job took too long to run"
|
||||
run_supercronic "${BATS_TEST_DIRNAME}/timeout.crontab" 1s | grep -iE "job took too long to run"
|
||||
}
|
||||
|
||||
@test "it warns repeatedly when a job is still running" {
|
||||
n="$(run_supercronic "${BATS_TEST_DIRNAME}/timeout.crontab" 1s | grep -iE "job is still running" | wc -l)"
|
||||
[[ "$n" -eq 2 ]]
|
||||
}
|
||||
|
||||
@test "it supports debug logging " {
|
||||
|
@@ -1 +1 @@
|
||||
* * * * * * * sleep 2
|
||||
* * * * * * * sleep 2.5
|
||||
|
Reference in New Issue
Block a user