Bug fix: Fallback to normal restart when cannot concurrent.

hopter-project · Aug 18, 2024 · 834608c · 834608c
1 parent e5020e0
commit 834608c
Show file tree

Hide file tree

Showing 9 changed files with 149 additions and 39 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -205,6 +205,10 @@ path = "examples/tests/task/unwind/deferred_nested_drop.rs"
 name = "test-task-unwind-concurrent_restart"
 path = "examples/tests/task/unwind/concurrent_restart.rs"
 
+[[example]]
+name = "test-task-unwind-failed_concurrent_restart"
+path = "examples/tests/task/unwind/failed_concurrent_restart.rs"
+
 # *** Tests for task - segmented stack ***
 
 [[example]]

diff --git a/examples/tests/task/segmented_stack/return_values.rs b/examples/tests/task/segmented_stack/return_values.rs
@@ -1,6 +1,4 @@
 //! Test function return values via registers.
-//!
-//! FIXME: Does return value ever get passed through stack?
 
 #![no_std]
 #![no_main]

diff --git a/examples/tests/task/unwind/failed_concurrent_restart.rs b/examples/tests/task/unwind/failed_concurrent_restart.rs
@@ -0,0 +1,63 @@
+//! Tests that a panicked task should unwind first and reuse its task struct
+//! to restart a new instance when the maximum number of tasks allowed by the
+//! system configuration has already been reached.
+
+#![no_std]
+#![no_main]
+
+extern crate alloc;
+use core::sync::atomic::{AtomicBool, Ordering};
+use hopter::{boot::main, config, debug::semihosting, hprintln, task};
+
+#[main]
+fn main(_: cortex_m::Peripherals) {
+    // Task #0 is the idle task.
+    // Task #1 is the main task.
+
+    // Task #2 is the test task.
+    task::build()
+        .set_entry(will_panic)
+        .set_priority(config::DEFAULT_TASK_PRIORITY)
+        .spawn_restartable()
+        .unwrap();
+
+    // Task #3 to #MAX_TASK_NUMBER-1 are dummy tasks.
+    for _ in 3..config::MAX_TASK_NUMBER {
+        task::build()
+            .set_entry(|| {})
+            .set_priority(config::UNWIND_PRIORITY + 1)
+            .spawn()
+            .unwrap();
+    }
+
+    // Now we have reached the maximum task number. The panicked test task will
+    // not be able to concurrently restart.
+
+    // Let the test task run.
+    task::change_current_priority(config::UNWIND_PRIORITY + 1).unwrap();
+
+    semihosting::terminate(true);
+}
+
+fn will_panic() {
+    static FIRST_TIME: AtomicBool = AtomicBool::new(true);
+    let first_time = FIRST_TIME.fetch_and(false, Ordering::SeqCst);
+
+    // Deliberate panic when the task is executed for the first time.
+    // Unwinding should happen before the second run is completed becaues it
+    // cannot concurrently restart.
+    if first_time {
+        let _print_on_drop = PrintOnDrop("First run dropped on panic");
+        panic!()
+    }
+
+    hprintln!("Second run completed");
+}
+
+struct PrintOnDrop(&'static str);
+
+impl Drop for PrintOnDrop {
+    fn drop(&mut self) {
+        hprintln!("{}", self.0)
+    }
+}
diff --git a/examples/tests/task/unwind/failed_concurrent_restart.txt b/examples/tests/task/unwind/failed_concurrent_restart.txt
@@ -0,0 +1,2 @@
+First run dropped on panic
+Second run completed
diff --git a/run_tests.py b/run_tests.py
@@ -26,7 +26,6 @@ def enumerate_tests():
                 file_no_ext = os.path.basename(file_rs_ext)[:-3]
                 file_txt_ext = file_no_ext + '.txt'
                 file_py_ext = file_no_ext + '.py'
-                assert(file_txt_ext in answer_files or file_py_ext in answer_files)
 
                 if file_txt_ext in answer_files:
                     tests.append((
@@ -43,6 +42,7 @@ def enumerate_tests():
                         f'Error: Test file {category}-{subcategory}-{file_rs_ext} does not have an answer file.',
                         file=sys.stderr
                     )
+                    sys.exit(1)
     return tests
 
 def main():

diff --git a/src/schedule/scheduler.rs b/src/schedule/scheduler.rs
@@ -3,7 +3,7 @@ use crate::{
     config,
     interrupt::svc,
     sync::{Access, AllowPendOp, Holdable, Interruptable, RefCellSchedSafe, RunPendedOp, Spin},
-    task::{Task, TaskBuildError, TaskListAdapter, TaskListInterfaces, TaskState},
+    task::{Task, TaskListAdapter, TaskListInterfaces, TaskState},
     unrecoverable::{self, Lethal},
 };
 use alloc::sync::Arc;
@@ -259,23 +259,49 @@ impl Scheduler {
         STARTED.load(Ordering::SeqCst)
     }
 
+    /// Check whether the existing task number has already reached the allowed
+    /// maximum ([`MAX_TASK_NUMBER`](config::MAX_TASK_NUMBER)). If not, return
+    /// a [`Quota`] which is a token allowing new task creation.
+    pub(crate) fn request_task_quota() -> Result<Quota, ()> {
+        loop {
+            let exist_task_num = EXIST_TASK_NUM.load(Ordering::SeqCst);
+
+            // If we can still hold more tasks, temporarily increment the task
+            // number and return a `Quota`.
+            if exist_task_num < config::MAX_TASK_NUMBER {
+                match EXIST_TASK_NUM.compare_exchange_weak(
+                    exist_task_num,
+                    exist_task_num + 1,
+                    Ordering::SeqCst,
+                    Ordering::SeqCst,
+                ) {
+                    Ok(_) => return Ok(Quota(Seal)),
+                    // In rare case where we have a contention on the counter,
+                    // try again with the test.
+                    Err(_) => continue,
+                }
+            // Maximum number of tasks reached, return error.
+            } else {
+                return Err(());
+            }
+        }
+    }
+
     /// Insert a new task to the scheduler's ready queue. The task must not be an
     /// existing task, but can be a new restarted instance of a panicked task.
     ///
+    /// # Parameter
+    /// - `task`: The task struct of the new task.
+    /// - `quota`: The previously acquired quota by calling
+    ///   [`request_task_quota`](Self::request_task_quota).
+    ///
     /// # Return
     /// - `Ok(())` if the new task is ready to be run by the scheduler
     /// - `Err(TaskBuildError::NoMoreTask)` if the maximum number of tasks has
     ///    been reached.
-    pub(crate) fn accept_new_task(task: Arc<Task>) -> Result<(), TaskBuildError> {
-        // Return error if reached maximum task number, otherwise increment it.
-        if EXIST_TASK_NUM.load(Ordering::SeqCst) >= config::MAX_TASK_NUMBER {
-            return Err(TaskBuildError::NoMoreTask);
-        }
-        EXIST_TASK_NUM.fetch_add(1, Ordering::SeqCst);
-
-        Self::insert_task_to_ready_queue(task);
-
-        Ok(())
+    pub(crate) fn accept_new_task(task: Task, quota: Quota) {
+        core::mem::forget(quota);
+        Self::insert_task_to_ready_queue(Arc::new(task));
     }
 
     /// Insert a task back to the scheduler's ready queue. The task should
@@ -388,3 +414,13 @@ impl Holdable for Scheduler {
         Scheduler::resume();
     }
 }
+
+pub(crate) struct Quota(Seal);
+
+impl Drop for Quota {
+    fn drop(&mut self) {
+        EXIST_TASK_NUM.fetch_sub(1, Ordering::SeqCst);
+    }
+}
+
+struct Seal;
diff --git a/src/task/builder.rs b/src/task/builder.rs
@@ -1,5 +1,5 @@
 use super::Task;
-use crate::{config, schedule::scheduler::Scheduler, unrecoverable::Lethal};
+use crate::{config, schedule::scheduler::Scheduler};
 use alloc::sync::Arc;
 
 /// Enumeration of errors during task creation.
@@ -120,15 +120,20 @@ where
             return Err(TaskBuildError::NoStack);
         }
 
+        // Get a quota from the scheduler to ensure that the maximum number of
+        // tasks has not been reached yet.
+        let quota = Scheduler::request_task_quota().map_err(|_| TaskBuildError::NoMoreTask)?;
+
         let new_task = Task::build(
             id,
             entry_closure,
             self.init_stklet_size,
             self.is_dynamic_stack,
             prio,
-        )
-        .unwrap_or_die();
-        Scheduler::accept_new_task(Arc::new(new_task))
+        )?;
+        Scheduler::accept_new_task(new_task, quota);
+
+        Ok(())
     }
 }
 
@@ -149,25 +154,31 @@ where
             return Err(TaskBuildError::NoStack);
         }
 
+        // Get a quota from the scheduler to ensure that the maximum number of
+        // tasks has not been reached yet.
+        let quota = Scheduler::request_task_quota().map_err(|_| TaskBuildError::NoMoreTask)?;
+
         let new_task = Task::build_restartable(
             id,
             entry_closure,
             self.init_stklet_size,
             self.is_dynamic_stack,
             prio,
-        )
-        .unwrap_or_die();
-        Scheduler::accept_new_task(Arc::new(new_task))
+        )?;
+        Scheduler::accept_new_task(new_task, quota);
+
+        Ok(())
     }
 }
 
 /// Start a new task from a previously failed task.
 #[cfg(feature = "unwind")]
-pub(crate) fn spawn_restarted_from_task(prev_task: Arc<Task>) -> Result<(), ()> {
-    let new_task = Task::build_restarted(prev_task);
-
-    // FIXME: should check for available task slot in advance but not here.
-    Scheduler::accept_new_task(Arc::new(new_task)).unwrap_or_die();
+pub(crate) fn try_spawn_restarted(prev_task: Arc<Task>) -> Result<(), ()> {
+    // Get a quota from the scheduler to ensure that the maximum number of
+    // tasks has not been reached yet.
+    let quota = Scheduler::request_task_quota()?;
 
+    let restarted_task = Task::build_restarted(prev_task);
+    Scheduler::accept_new_task(restarted_task, quota);
     Ok(())
 }
diff --git a/src/task/task_struct.rs b/src/task/task_struct.rs
@@ -590,8 +590,6 @@ impl Task {
         // task.
         self.restarted_from = Some(Arc::downgrade(&prev_task));
 
-        // FIXME: what if this newly built task struct fail to be inserted into
-        // the ready queue?
         // Record that the panicked task has been restarted. This will prevent
         // other restart attempt.
         prev_task.has_restarted.store(true, Ordering::SeqCst);

diff --git a/src/unwind/unwind.rs b/src/unwind/unwind.rs
@@ -392,26 +392,24 @@ impl<'a> Debug for UnwindState<'a> {
 
 #[inline(never)]
 fn try_concurrent_restart() {
-    let res = current::with_current_task_arc(|cur_task| {
+    current::with_current_task_arc(|cur_task| {
         // We will limit the concurrent restart rate to at most one concurrent
         // instance. If this task is a restarted instance, and also if the original
         // instance has not finished unwinding, i.e. the task struct reference
         // count is positive, we will not do concurrent restart.
         if let Some(restarted_from) = cur_task.get_restart_origin_task() {
             if restarted_from.strong_count() != 0 {
-                return Ok(());
+                return;
             }
         }
 
-        // Otherwise, we concurrently restart the task.
-        task::spawn_restarted_from_task(cur_task)
-    });
-
-    // Concurrent restart failed.
-    // FIXME: in this case, we should try normal restart instead of giving up.
-    if res.is_err() {
-        cortex_m::interrupt::free(|_| loop {});
-    }
+        // Otherwise, we try to concurrently restart the panicked task. It is
+        // fine if we are not able to start a new instance running concurrently,
+        // probably because the maximum number of tasks has been reached. In
+        // this case the restarted instance will reuse the current task struct
+        // and will run only after the unwinding finishes.
+        let _ = task::try_spawn_restarted(cur_task);
+    })
 }
 
 impl UnwindState<'static> {
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		First run dropped on panic
		Second run completed