From 9a964c8cb537ce62ae6dc8443028134ed79f919c Mon Sep 17 00:00:00 2001 From: Thomas Orozco Date: Wed, 10 Jan 2018 13:37:52 +0100 Subject: [PATCH] Warn when a job should start but is still running This will make it easier for users to debug hung jobs: rather than requiring users to go back to when the job started to realize (implicitly) that it's not running, this will warn at every interval that something is wrong. --- cron/cron.go | 33 ++++++++++++++++++++++++++++----- integration/test.bats | 13 +++++++++---- integration/timeout.crontab | 2 +- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/cron/cron.go b/cron/cron.go index 20f7337..4b602f6 100644 --- a/cron/cron.go +++ b/cron/cron.go @@ -2,6 +2,7 @@ package cron import ( "bufio" + "context" "fmt" "github.com/aptible/supercronic/crontab" "github.com/sirupsen/logrus" @@ -59,17 +60,17 @@ func startReaderDrain(wg *sync.WaitGroup, readerLogger *logrus.Entry, reader io. }() } -func runJob(context *crontab.Context, command string, jobLogger *logrus.Entry) error { +func runJob(cronCtx *crontab.Context, command string, jobLogger *logrus.Entry) error { jobLogger.Info("starting") - cmd := exec.Command(context.Shell, "-c", command) + cmd := exec.Command(cronCtx.Shell, "-c", command) // Run in a separate process group so that in interactive usage, CTRL+C // stops supercronic, not the children threads. cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} env := os.Environ() - for k, v := range context.Environ { + for k, v := range cronCtx.Environ { env = append(env, fmt.Sprintf("%s=%s", k, v)) } cmd.Env = env @@ -105,7 +106,22 @@ func runJob(context *crontab.Context, command string, jobLogger *logrus.Entry) e return nil } -func StartJob(wg *sync.WaitGroup, context *crontab.Context, job *crontab.Job, exitChan chan interface{}, cronLogger *logrus.Entry) { +func monitorJob(ctx context.Context, expression crontab.Expression, t0 time.Time, jobLogger *logrus.Entry) { + t := t0 + + for { + t = expression.Next(t) + + select { + case <-time.After(time.Until(t)): + jobLogger.Warnf("not starting: job is still running since %s (%s elapsed)", t0, t.Sub(t0)) + case <-ctx.Done(): + return + } + } +} + +func StartJob(wg *sync.WaitGroup, cronCtx *crontab.Context, job *crontab.Job, exitChan chan interface{}, cronLogger *logrus.Entry) { wg.Add(1) go func() { @@ -139,7 +155,14 @@ func StartJob(wg *sync.WaitGroup, context *crontab.Context, job *crontab.Job, ex "iteration": cronIteration, }) - err := runJob(context, job.Command, jobLogger) + err := func() error { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go monitorJob(ctx, job.Expression, nextRun, jobLogger) + + return runJob(cronCtx, job.Command, jobLogger) + }() if err == nil { jobLogger.Info("job succeeded") diff --git a/integration/test.bats b/integration/test.bats index cd5710d..2d4ff61 100644 --- a/integration/test.bats +++ b/integration/test.bats @@ -1,12 +1,12 @@ function run_supercronic() { local crontab="$1" - local timeout="${2:-2s}" + local timeout="${2:-1s}" timeout --preserve-status --kill-after "30s" "$timeout" \ "${BATS_TEST_DIRNAME}/../supercronic" ${SUPERCRONIC_ARGS:-} "$crontab" 2>&1 } @test "it starts" { - run_supercronic "${BATS_TEST_DIRNAME}/noop.crontab" 2s + run_supercronic "${BATS_TEST_DIRNAME}/noop.crontab" } @test "it runs a cron job" { @@ -23,11 +23,16 @@ function run_supercronic() { } @test "it warns when USER is set" { - run_supercronic "${BATS_TEST_DIRNAME}/user.crontab" 5s | grep -iE "processes will not.*USER=" + run_supercronic "${BATS_TEST_DIRNAME}/user.crontab" 1s | grep -iE "processes will not.*USER=" } @test "it warns when a job is falling behind" { - run_supercronic "${BATS_TEST_DIRNAME}/timeout.crontab" 5s | grep -iE "job took too long to run" + run_supercronic "${BATS_TEST_DIRNAME}/timeout.crontab" 1s | grep -iE "job took too long to run" +} + +@test "it warns repeatedly when a job is still running" { + n="$(run_supercronic "${BATS_TEST_DIRNAME}/timeout.crontab" 1s | grep -iE "job is still running" | wc -l)" + [[ "$n" -eq 2 ]] } @test "it supports debug logging " { diff --git a/integration/timeout.crontab b/integration/timeout.crontab index 57eab25..bbaa2a9 100644 --- a/integration/timeout.crontab +++ b/integration/timeout.crontab @@ -1 +1 @@ -* * * * * * * sleep 2 +* * * * * * * sleep 2.5