diff --git a/README.md b/README.md index 04ff58f..189500a 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ http { shm_name = "timer_shm", -- shm to use for node-wide timers key_name = "my_key", -- key-name to use for node-wide timers sub_interval = 0.1, -- max cross worker extra delay + max_use = 1000, -- maximum re-use of timer context } local object @@ -59,7 +60,8 @@ The OpenResty timer is fairly limited, this timer adds a number of common options as parameters without having to recode (and retest) them in each project. -* recurring timers (supported by OR as well through `ngx.timer.every`) +* recurring timers (supported by OR as well through `ngx.timer.every`, but this + implementation will not run overlapping timers) * immediate first run for recurring timers @@ -77,6 +79,35 @@ project. See the [online LDoc documentation](https://kong.github.io/lua-resty-timer/topics/README.md.html) for the complete API. +## Performance and optimizations + +This timer implementation is based on "sleeping on a timer-context". This means +that a single timer is created, and in between recurring invocations `ngx.sleep` +is called as a delay to the next invocation. This as opposed to creating a new +Nginx timer for each invocation. This is configurable however. + +Creating a new context is a rather expensive operation. Hence we keep the context +alive and just sleep without the need to recreate it. The downside is that there +is the possibility of a memory leak. Since a timer is implemented in OR as a +request and requests are short-lived, some memory is not released until after the +context is destroyed. + +The setting `max_use` controls the timer behaviour. The default value is `1000`, +which means that after each `1000` invocations the timer context is destroyed +and a new one is generated (this happens transparent to the user). + +Optimizing this setting (very opinionated/arbitrary!): + + * if the timer interval is more than `60` seconds, then keeping the context + around in idle state for that period is probably more expensive resource wise + than having to recreate the context. So use `max_use == 1` to drop the + context after each invocation. + + * if the timer interval is less than `5` seconds then reusing the context makes + sense. Assume recycling to be done once per minute, or for very high + frequency timers (and hence higher risk of memory leak), more than once per + minute. + ## History Versioning is strictly based on [Semantic Versioning](https://semver.org/) @@ -95,6 +126,11 @@ Versioning is strictly based on [Semantic Versioning](https://semver.org/) ### unreleased * Feat: provide a stacktrace upon errors in the timer callback + * Feat: add a `max_use` option. This ensures timer-contexts are recycled to + prevent memory leaks. + * Feat: adds a new function `sleep` similar to `ngx.sleep` except that it is + interrupted on worker exit. + * Fix: now accounts for execution time of the handler, when rescheduling. ### 1.1.0 (6-Nov-2020) diff --git a/lib/resty/timer.lua b/lib/resty/timer.lua index 6bd80d7..88c27d5 100644 --- a/lib/resty/timer.lua +++ b/lib/resty/timer.lua @@ -14,7 +14,7 @@ local anchor_registry = {} local gc_registry = setmetatable({},{ __mode = "v" }) local timer_id = 0 local now = ngx.now -local sleep = ngx.sleep +local ngx_sleep = ngx.sleep local exiting = ngx.worker.exiting local KEY_PREFIX = "[lua-resty-timer]" @@ -23,6 +23,64 @@ local CANCEL_GC = "GC" local CANCEL_SYSTEM = "SYSTEM" local CANCEL_USER = "USER" +local sleep do + -- create a 10yr timer. Will only be called when the worker exits with + -- `premature` set. The callback will release a global semaphore to wake up + -- sleeping threads + local sema = assert(require("ngx.semaphore").new()) + assert(timer_at(10*365*24*60*60, function() + sema:post(math.huge) + + -- TODO: remove the sleep(0), it's a hack around semaphores + -- not being released properly, an Openresty bug. + -- See https://github.com/openresty/lua-resty-core/issues/337 + ngx_sleep(0) + end)) + + --- A `sleep` function that exits early on worker exit. The same as `ngx.sleep` + -- except that it will be interrupted when the current worker starts exiting. + -- Calling this function after the worker started exiting will immediately + -- return (after briefly yielding with a `ngx.sleep(0)`). + -- @param delay same as `ngx.sleep()`; delay in seconds. + -- @return `true` if finished, `false` if returned early, or nil+err (under + -- the hood it will return: "`not ngx.worker.exiting()`"). + -- @usage if not sleep(5) then + -- -- sleep was interrupted, exit now + -- return nil, "exiting" + -- end + -- + -- -- do stuff + function sleep(delay) + if type(delay) ~= "number" then + error("Bad argument #1, expected number, got " .. type(delay), 2) + end + + if delay <= 0 then + -- no delay, just yield and return + ngx_sleep(delay) + return not exiting() + end + + local ok, err = sema:wait(delay) + if err == "timeout" then + -- the sleep was finished + return not exiting() + end + + -- each call to sleep should at a minimum at least yield, to prevent + -- dead-locks elsewhere, so forcefully yield here. + ngx_sleep(0) + + if ok then + -- we're exiting early because resources were posted to the semaphore + return not exiting() + end + + ngx.log(ngx.ERR, "waiting for semaphore failed: ", err) + return nil, err + end +end + --- Cancel the timer. @@ -113,19 +171,35 @@ local function handler(premature, id) end end - -- reschedule the timer + -- calculate next interval local interval = self.sub_interval + self.sub_jitter local t = now() next_interval = math.max(0, self.expire + interval - t) self.expire = t + next_interval + + -- do we need to recycle the current timer-context? + local call_count = self.call_count + 1 + if call_count > self.max_use then + -- recreate context + local ok, err = timer_at(next_interval, handler, id) + if ok then + self.call_count = 0 + return -- exit the while loop, and end this timer context + end + -- couldn't create a timer, so the system seems to be under pressure + -- of timer resources, so we log the error and then fallback on the + -- current context and sleeping. Next invocation will again try and + -- replace the timer context. + if err ~= "process exiting" then + ngx.log(ngx.ERR, LOG_PREFIX, "failed to create timer: " .. err) + end + end + self.call_count = call_count self = nil -- luacheck: ignore -- just to make sure we're eligible for GC end -- existing timer recurring, so keep this thread alive and just sleep - if not exiting() then - sleep(next_interval) - end - premature = exiting() + premature = not sleep(next_interval) end -- while end @@ -141,12 +215,12 @@ end -- -- * `jitter` : (optional, number) variable interval to add to the first interval, default 0. -- If set to 1 second then the first interval will be set between `interval` and `interval + 1`. --- This makes sure if large numbers of timers are used, their execution gets randomly +-- This makes sure if a large number of timers are used, their execution gets randomly -- distributed. -- -- * `immediate` : (boolean) will do the first run immediately (the initial -- interval will be set to 0 seconds). This option requires the `recurring` option. --- The first run will not include the `jitter` interval, it will be added to second run. +-- The first run will not include the `jitter` interval, it will be added to the second run. -- -- * `detached` : (boolean) if set to `true` the timer will keep running detached, if -- set to `false` the timer will be garbage collected unless anchored @@ -175,6 +249,9 @@ end -- this option set, the maximum delay will be `interval + sub_interval`. -- This option requires the `immediate` and `key_name` options. -- +-- * `max_use` : (optional, number, default 1000) the maximum use count for a +-- timer context before recreating it. +-- -- @function new -- @param opts table with options -- @param ... arguments to pass to the callbacks `expire` and `cancel`. @@ -226,6 +303,7 @@ local function new(opts, ...) detached = opts.detached, -- should run detached, prevent GC args = pack(...), -- arguments to pass along jitter = opts.jitter, -- maximum variance in each schedule + max_use = opts.max_use, -- max use count before recycling timer context -- callbacks cb_expire = opts.expire, -- the callback function cb_cancel = opts.cancel, -- callback function on cancellation @@ -241,6 +319,7 @@ local function new(opts, ...) premature_reason = nil, -- inicator why we're being cancelled gc_proxy = nil, -- userdata proxy to track GC expire = nil, -- time when timer expires + call_count = 0, -- call_count in current timer context } assert(self.interval, "expected 'interval' to be a number") @@ -279,6 +358,13 @@ local function new(opts, ...) self.jitter = 0 self.sub_jitter = 0 end + if self.max_use then + assert(self.recurring, "'max_use' can only be specified on recurring timers") + assert(type(self.max_use) == "number", "expected 'max_use' to be a number") + assert(self.max_use > 0, "expected 'max_use' to be greater than 0") + else + self.max_use = 1000 + end if self.key_name then assert(type(self.key_name) == "string", "expected 'key_name' to be a string") assert(opts.shm_name, "'shm_name' is required when specifying 'key_name'") @@ -309,6 +395,7 @@ end return setmetatable( { new = new, + sleep = sleep, CANCEL_GC = CANCEL_GC, CANCEL_SYSTEM = CANCEL_SYSTEM, CANCEL_USER = CANCEL_USER, diff --git a/t/00-new.t b/t/00-new.t index 02436f2..6bd1b6f 100644 --- a/t/00-new.t +++ b/t/00-new.t @@ -681,3 +681,69 @@ GET /t --- error_log expected 'jitter' to be greater than or equal to 0 + + + +=== TEST 19: new() max_use must be a number +--- http_config eval: $::HttpConfig +--- config + location = /t { + content_by_lua_block { + local timer = require("resty.timer") + local options = { + interval = 1, + max_use = "hello", + recurring = true, + immediate = false, + detached = false, + expire = function(arg1, arg2, arg3) + ngx.log(ngx.ERR, "EXPIRE ", arg1, arg2, arg3) + end, + } + local ok, err = pcall(timer.new, options, "arg1", nil, "arg3") + if ok then + ngx.say(true) + else + ngx.log(ngx.ERR, err) + end + } + } +--- request +GET /t +--- response_body + +--- error_log +expected 'max_use' to be a number + + + +=== TEST 20: new() max_use must be > 0 +--- http_config eval: $::HttpConfig +--- config + location = /t { + content_by_lua_block { + local timer = require("resty.timer") + local options = { + interval = 1, + max_use = 0, + recurring = true, + immediate = false, + detached = false, + expire = function(arg1, arg2, arg3) + ngx.log(ngx.ERR, "EXPIRE ", arg1, arg2, arg3) + end, + } + local ok, err = pcall(timer.new, options, "arg1", nil, "arg3") + if ok then + ngx.say(true) + else + ngx.log(ngx.ERR, err) + end + } + } +--- request +GET /t +--- response_body + +--- error_log +expected 'max_use' to be greater than 0 diff --git a/t/02-timer-reuse-1.t b/t/02-timer-reuse-1.t new file mode 100644 index 0000000..56bbf4f --- /dev/null +++ b/t/02-timer-reuse-1.t @@ -0,0 +1,176 @@ +use Test::Nginx::Socket::Lua; +use Cwd qw(cwd); + +workers(1); + +plan tests => repeat_each() * (blocks() * 2) + 1; + +my $pwd = cwd(); + +our $HttpConfig = qq{ + lua_package_path "$pwd/lib/?.lua;;"; + lua_shared_dict test_shm 8m; + lua_shared_dict timer_shm 8m; +}; + +run_tests(); + +__DATA__ + +=== TEST 1: new() timer runs periodically +--- http_config eval: $::HttpConfig +--- config + location = /t { + content_by_lua_block { + local timer = require("resty.timer") + local options = { + interval = 0.1, + recurring = true, + immediate = false, + detached = false, + max_reuse = 1, + expire = function(arg1, arg2, arg3) + ngx.log(ngx.ERR, "EXPIRE ", arg1, arg2, arg3) + end, + cancel = function(reason, arg1, arg2, arg3) + ngx.log(ngx.ERR, "CANCEL ", reason, arg1, arg2, arg3) + end, + --shm_name = "timer_shm", + --key_name = "my_key", + } + local t = timer(options, "arg1", nil, "arg3") + ngx.sleep(0.59) -- 5 occurences + t:cancel() + ngx.say(true) + } + } +--- request +GET /t +--- response_body +true +--- grep_error_log eval: qr/EXPIRE arg1nilarg3|CANCEL USERarg1nilarg3/ +--- grep_error_log_out eval +qr/^EXPIRE arg1nilarg3 +EXPIRE arg1nilarg3 +EXPIRE arg1nilarg3 +EXPIRE arg1nilarg3 +EXPIRE arg1nilarg3 +CANCEL USERarg1nilarg3$/ + + + +=== TEST 2: new() only a single timer runs per shm key +--- http_config eval: $::HttpConfig +--- config + location = /t { + content_by_lua_block { + local timer = require("resty.timer") + local count = 0 + local options = { + interval = 0.1, + sub_interval = 0.001, + recurring = true, + immediate = true, + detached = false, + max_reuse = 1, + expire = function(arg1, arg2, arg3) + count = count + 1 + end, + shm_name = "timer_shm", + key_name = "my_key", + } + for x = 1,10 do + -- create 10 timers with same shm key + -- only 1 should run + timer(options, "arg1", nil, "arg3") + end + -- sleep 5 times interval => + "immediate" = 6 executions + ngx.sleep(0.55) -- could be 10 x 6 = 60 occurences + ngx.say(count) + } + } +--- request +GET /t +--- response_body +6 + + + +=== TEST 3: new() timer runs immediately +--- http_config eval: $::HttpConfig +--- config + location = /t { + content_by_lua_block { + local timer = require("resty.timer") + local count = 0 + local options = { + interval = 0.1, + recurring = true, + immediate = true, + detached = false, + max_reuse = 1, + expire = function(arg1, arg2, arg3) + count = count + 1 + end, + --shm_name = "timer_shm", + --key_name = "my_key", + } + local t = timer(options, "arg1", nil, "arg3") + ngx.sleep(0.15) -- could be 1 occurence, +1 for immediate + ngx.say(count) + } + } +--- request +GET /t +--- response_body +2 + + + +=== TEST 4: new() sub_interval is honored +--- http_config eval: $::HttpConfig +--- config + location = /t { + content_by_lua_block { + local timer = require("resty.timer") + local count = 0 + local t = {} + ngx.update_time() + local t0 = ngx.now() + local options = { + interval = 0.1, + recurring = true, + immediate = true, + detached = false, + max_reuse = 1, + expire = function(t_id) + count = count + 1 + ngx.update_time() + --print("========EXEC=======> ", t_id, " @ ", 1000*(ngx.now() - t0)) + if t_id == 1 then + t[t_id]:cancel() -- cancel so it ran only once + end + end, + shm_name = "timer_shm", + key_name = "my_key", + sub_interval = 0.01, + } + for x = 1,2 do + -- create 2 timers with same shm key + -- only 1 should run + t[x] = timer(options, x) + ngx.update_time() + --print("=======SCHED=======> ",x, " @ ", 1000*(ngx.now() - t0)) + -- wait till half way interval before scheduling the second one + ngx.sleep(options.interval / 2) + end + -- first timer ran on start, so count == 1, timer 1 was immediately cancelled + ngx.sleep(options.interval / 2) -- lock set by 1st timer expires, the first half was already done when creating the timers above + ngx.sleep(options.sub_interval * 1.5) -- by now the second timer should have taken over (count == 2) + ngx.say(count) --> 2; first when first timer starts, 2nd by second timer after it picked up + } + } +--- request +GET /t +--- response_body +2 diff --git a/t/02-garbagecollect.t b/t/03-garbagecollect.t similarity index 100% rename from t/02-garbagecollect.t rename to t/03-garbagecollect.t