ostd/task/scheduler/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2//! Task scheduling.
3//!
4//! # Scheduler Injection
5//!
6//! The task scheduler of an OS is a complex beast,
7//! and the most suitable scheduling algorithm often depends on the target usage scenario.
8//! To avoid code bloat and offer flexibility,
9//! OSTD does not include a gigantic, one-size-fits-all task scheduler.
10//! Instead, it allows the client to implement a custom scheduler (in safe Rust, of course)
11//! and register it with OSTD.
12//! This feature is known as **scheduler injection**.
13//!
14//! The client kernel performs scheduler injection via the [`inject_scheduler`] API.
15//! This API should be called as early as possible during kernel initialization,
16//! before any [`Task`]-related APIs are used.
17//! This requirement is reasonable since `Task`s depend on the scheduler.
18//!
19//! # Scheduler Abstraction
20//!
21//! The `inject_scheduler` API accepts an object implementing the [`Scheduler`] trait,
22//! which abstracts over any SMP-aware task scheduler.
23//! Whenever an OSTD client spawns a new task (via [`crate::task::TaskOptions`])
24//! or wakes a sleeping task (e.g., via [`crate::sync::Waker`]),
25//! OSTD internally forwards the corresponding `Arc<Task>`
26//! to the scheduler by invoking the [`Scheduler::enqueue`] method.
27//! This allows the injected scheduler to manage all runnable tasks.
28//!
29//! Each enqueued task is dispatched to one of the per-CPU local runqueues,
30//! which manage all runnable tasks on a specific CPU.
31//! A local runqueue is abstracted by the [`LocalRunQueue`] trait.
32//! OSTD accesses the local runqueue of the current CPU
33//! via [`Scheduler::local_rq_with`] or [`Scheduler::mut_local_rq_with`],
34//! which return immutable and mutable references to `dyn LocalRunQueue`, respectively.
35//!
36//! The [`LocalRunQueue`] trait enables OSTD to inspect and manipulate local runqueues.
37//! For instance, OSTD invokes the [`LocalRunQueue::pick_next`] method
38//! to let the scheduler select the next task to run.
39//! OSTD then performs a context switch to that task,
40//! which becomes the _current_ running task, accessible via [`LocalRunQueue::current`].
41//! When the current task is about to sleep (e.g., via [`crate::sync::Waiter`]),
42//! OSTD removes it from the local runqueue using [`LocalRunQueue::dequeue_current`].
43//!
44//! The interfaces of `Scheduler` and `LocalRunQueue` are simple
45//! yet (perhaps surprisingly) powerful enough to support
46//! even complex and advanced task scheduler implementations.
47//! Scheduler implementations are free to employ any load-balancing strategy
48//! to dispatch enqueued tasks across local runqueues,
49//! and each local runqueue is free to choose any prioritization strategy
50//! for selecting the next task to run.
51//! Based on OSTD's scheduling abstractions,
52//! the Asterinas kernel has successfully supported multiple Linux scheduling classes,
53//! including both real-time and normal policies.
54//!
55//! # Safety Impact
56//!
57//! While OSTD delegates scheduling decisions to the injected task scheduler,
58//! it verifies these decisions to avoid undefined behavior.
59//! In particular, it enforces the following safety invariant:
60//!
61//! > A task must not be scheduled to run on more than one CPU at a time.
62//!
63//! Violating this invariant—e.g., running the same task on two CPUs concurrently—
64//! can have catastrophic consequences,
65//! as the task's stack and internal state may be corrupted by concurrent modifications.
66// mod fifo_scheduler;
67// pub mod info;
68
69use alloc::sync::Arc;
70use spin::Once;
71
72// use super::{preempt::cpu_local, processor, Task};
73use super::Task;
74// use crate::{
75//     cpu::{CpuId, CpuSet, PinCurrentCpu},
76//     prelude::*,
77//     task::disable_preempt,
78//     timer,
79// };
80use crate::specs::mm::cpu::CpuId;
81
82/// Injects a custom implementation of task scheduler into OSTD.
83///
84/// This function can only be called once and must be called during the initialization phase of kernel,
85/// before any [`Task`]-related APIs are invoked.
86pub fn inject_scheduler(scheduler: &'static dyn Scheduler<Task>) {
87    /* SCHEDULER.call_once(|| scheduler);
88
89    timer::register_callback(|| {
90        SCHEDULER.get().unwrap().mut_local_rq_with(&mut |local_rq| {
91            let should_pick_next = local_rq.update_current(UpdateFlags::Tick);
92            if should_pick_next {
93                cpu_local::set_need_preempt();
94            }
95        })
96    }); */
97}
98
99static SCHEDULER: Once<&'static dyn Scheduler<Task>> = Once::new();
100
101/// A SMP-aware task scheduler.
102pub trait Scheduler<T = Task>: Sync + Send {
103    /// Enqueues a runnable task.
104    ///
105    /// The scheduler implementer can perform load-balancing or some time accounting work here.
106    ///
107    /// The newly-enqueued task may have a higher priority than the currently running one on a CPU
108    /// and thus should preempt the latter.
109    /// In this case, this method returns the ID of that CPU.
110    fn enqueue(&self, runnable: Arc<T>, flags: EnqueueFlags) -> Option<CpuId>;
111
112    /// Gets an immutable access to the local runqueue of the current CPU.
113    fn local_rq_with(&self, f: &mut dyn FnMut(&dyn LocalRunQueue<T>));
114
115    /// Gets a mutable access to the local runqueue of the current CPU.
116    fn mut_local_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue<T>));
117}
118
119/// A per-CPU, local runqueue.
120///
121/// This abstraction allows OSTD to inspect and manipulate local runqueues.
122///
123/// Conceptually, a local runqueue maintains:
124/// 1. A priority queue of runnable tasks.
125///    The definition of "priority" is left to the concrete implementation.
126/// 2. The current running task.
127///
128/// # Interactions with OSTD
129///
130/// ## Overview
131///
132/// It is crucial for implementers of `LocalRunQueue`
133/// to understand how OSTD interacts with local runqueues.
134///
135/// A local runqueue is consulted by OSTD in response to one of four scheduling events:
136/// - **Yielding**, triggered by [`Task::yield_now`], where the current task voluntarily gives up CPU time.
137/// - **Sleeping**, triggered by [`crate::sync::Waiter::wait`]
138///   or any synchronization primitive built upon it (e.g., [`crate::sync::WaitQueue`], [`crate::sync::Mutex`]),
139///   which blocks the current task until a wake-up event occurs.
140/// - **Ticking**, triggered periodically by the system timer
141///   (see [`crate::arch::timer::TIMER_FREQ`]),
142///   which provides an opportunity to do time accounting and consider preemption.
143/// - **Exiting**, triggered when the execution logic of a task has come to an end,
144///   which informs the scheduler that the task is exiting and will never be enqueued again.
145///
146/// The general workflow for OSTD to handle a scheduling event is as follows:
147/// 1. Acquire exclusive access to the local runqueue using [`Scheduler::mut_local_rq_with`].
148/// 2. Call [`LocalRunQueue::update_current`] to update the current task's state,
149///    returning a boolean value that indicates
150///    whether the current task should and can be replaced with another runnable task.
151/// 3. If the task is about to sleep or exit, call [`LocalRunQueue::dequeue_current`]
152///    to remove it from the runqueue.
153/// 4. If the return value of `update_current` in Step 2 is true,
154///    then select the next task to run with [`LocalRunQueue::pick_next`].
155///
156/// ## When to Pick the Next Task?
157///
158/// As shown above,
159/// OSTD guarantees that `pick_next` is only called
160/// when the current task should and can be replaced.
161/// This avoids unnecessary invocations and improves efficiency.
162///
163/// But under what conditions should the current task be replaced?
164/// Two criteria must be met:
165/// 1. There exists at least one other runnable task in the runqueue.
166/// 2. That task should preempt the current one, if present.
167///
168/// Some implications of these rules:
169/// - If the runqueue is empty, `update_current` must return `false`—there's nothing to run.
170/// - If the runqueue is non-empty but the current task is absent,
171///   `update_current` should return `true`—anything is better than nothing.
172/// - If the runqueue is non-empty and the flag is `UpdateFlags::WAIT`,
173///   `update_current` should also return `true`,
174///   because the current task is about to block.
175/// - In other cases, the return value depends on the scheduler's prioritization policy.
176///   For instance, a real-time task may only be preempted by a higher-priority task
177///   or if it explicitly yields.
178///   A normal task under Linux's CFS may be preempted by a task with smaller vruntime,
179///   but never by the idle task.
180///
181/// When OSTD is unsure about whether the current task should or can be replaced,
182/// it will invoke [`LocalRunQueue::try_pick_next`], the fallible version of `pick_next`.
183///
184/// ## Internal Working
185///
186/// To guide scheduler implementers,
187/// we provide a simplified view of how OSTD interacts with local runqueues _internally_
188/// in order to handle the four scheduling events.
189///
190/// ### Yielding
191///
192/// ```
193/// # use ostd::prelude::*;
194/// # use ostd::task::{*, scheduler::*};
195/// #
196/// # fn switch_to(next: Arc<Task>) {}
197/// #
198/// /// Yields the current task.
199/// fn yield(scheduler: &'static dyn Scheduler) {
200///     let next_task_opt: Option<Arc<Task>> = scheduler.mut_local_rq_with(|local_rq| {
201///         let should_pick_next = local_rq.update_current(UpdateFlags::Yield);
202///         should_pick_next.then(|| local_rq.pick_next().clone())
203///     });
204///     let Some(next_task) = next_task_opt {
205///         switch_to(next_task);
206///     }
207/// }
208/// ```
209///
210/// ### Sleeping
211///
212/// ```
213/// # use ostd::prelude::*;
214/// # use ostd::task::{*, scheduler::*};
215/// #
216/// # fn switch_to(next: Arc<Task>) {}
217/// #
218/// /// Puts the current task to sleep.
219/// ///
220/// /// The function takes a closure to check if the task is woken.
221/// /// This function is used internally to guard against race conditions,
222/// /// where the task is woken just before it goes to sleep.
223/// fn sleep<F: Fn() -> bool>(scheduler: &'static dyn Scheduler, is_woken: F) {
224///     let mut next_task_opt: Option<Arc<Task>> = None;
225///     let mut is_first_try = true;
226///     while scheduler.mut_local_rq_with(|local_rq| {
227///         if is_first_try {
228///             if is_woken() {
229///                 return false; // exit loop
230///             }
231///             is_first_try = false;
232///
233///             let should_pick_next = local_rq.update_current(UpdateFlags::Wait);
234///             let _current = local_rq.dequeue_current();
235///             if !should_pick_next {
236///                 return true; // continue loop
237///             }
238///             next_task_opt = Some(local_rq.pick_next().clone());
239///             false // exit loop
240///         } else {
241///             next_task_opt = local_rq.try_pick_next().cloned();
242///             next_task_opt.is_none()
243///         }
244///     }) {}
245///     let Some(next_task) = next_task_opt {
246///         switch_to(next_task);
247///     }
248/// }
249/// ```
250///
251/// ### Ticking
252///
253/// ```
254/// # use ostd::prelude::*;
255/// # use ostd::task::{*, scheduler::*};
256/// #
257/// # fn switch_to(next: Arc<Task>) {}
258/// # mod cpu_local {
259/// #     fn set_need_preempt();
260/// #     fn should_preempt() -> bool;
261/// # }
262/// #
263/// /// A callback to be invoked periodically by the timer interrupt.
264/// fn on_tick(scheduler: &'static dyn Scheduler) {
265///     scheduler.mut_local_rq_with(|local_rq| {
266///         let should_pick_next = local_rq.update_current(UpdateFlags::Tick);
267///         if should_pick_next {
268///             cpu_local::set_need_preempt();
269///         }
270///     });
271/// }
272///
273/// /// A preemption point, called at an earliest convenient timing
274/// /// when OSTD can safely preempt the current running task.
275/// fn might_preempt(scheduler: &'static dyn Scheduler) {
276///     if !cpu_local::should_preempt() {
277///         return;
278///     }
279///     let next_task_opt: Option<Arc<Task>> = scheduler
280///         .mut_local_rq_with(|local_rq| local_rq.try_pick_next().cloned())
281///     let Some(next_task) = next_task_opt {
282///         switch_to(next_task);
283///     }
284/// }
285/// ```
286///
287/// ### Exiting
288///
289/// ```
290/// # use ostd::prelude::*;
291/// # use ostd::task::{*, scheduler::*};
292/// #
293/// # fn switch_to(next: Arc<Task>) {}
294/// #
295/// /// Exits the current task.
296/// fn exit(scheduler: &'static dyn Scheduler) {
297///     let mut next_task_opt: Option<Arc<Task>> = None;
298///     let mut is_first_try = true;
299///     while scheduler.mut_local_rq_with(|local_rq| {
300///         if is_first_try {
301///             is_first_try = false;
302///             let should_pick_next = local_rq.update_current(UpdateFlags::Exit);
303///             let _current = local_rq.dequeue_current();
304///             if !should_pick_next {
305///                 return true; // continue loop
306///             }
307///             next_task_opt = Some(local_rq.pick_next().clone());
308///             false // exit loop
309///         } else {
310///             next_task_opt = local_rq.try_pick_next().cloned();
311///             next_task_opt.is_none()
312///         }
313///     }) {}
314///     let next_task = next_task_opt.unwrap();
315///     switch_to(next_task);
316/// }
317/// ```
318pub trait LocalRunQueue<T = Task> {
319    /// Gets the current runnable task.
320    fn current(&self) -> Option<&Arc<T>>;
321
322    /// Updates the current runnable task's scheduling statistics and
323    /// potentially its position in the runqueue.
324    ///
325    /// The return value of this method indicates whether an invocation of `pick_next` should be followed
326    /// to find another task to replace the current one.
327    #[must_use]
328    fn update_current(&mut self, flags: UpdateFlags) -> bool;
329
330    /// Picks the next runnable task.
331    ///
332    /// This method instructs the local runqueue to pick the next runnable task and replace the current one.
333    /// A reference to the new "current" task will be returned by this method.
334    /// If the "old" current task presents, then it is still runnable and thus remains in the runqueue.
335    ///
336    /// # Panics
337    ///
338    /// As explained in the type-level Rust doc,
339    /// this method will only be invoked by OSTD after a call to `update_current` returns true.
340    /// In case that this contract is broken by the caller,
341    /// the implementer is free to exhibit any undesirable or incorrect behaviors, include panicking.
342    fn pick_next(&mut self) -> &Arc<T> {
343        self.try_pick_next().unwrap()
344    }
345
346    /// Tries to pick the next runnable task.
347    ///
348    /// This method instructs the local runqueue to pick the next runnable task on a best-effort basis.
349    /// If such a task can be picked, then this task supersedes the current task and
350    /// the new the method returns a reference to the new "current" task.
351    /// If the "old" current task presents, then it is still runnable and thus remains in the runqueue.
352    fn try_pick_next(&mut self) -> Option<&Arc<T>>;
353
354    /// Removes the current runnable task from runqueue.
355    ///
356    /// This method returns the current runnable task.
357    /// If there is no current runnable task, this method returns `None`.
358    fn dequeue_current(&mut self) -> Option<Arc<T>>;
359}
360
361/// Possible triggers of an `enqueue` action.
362#[derive(PartialEq, Copy, Clone)]
363pub enum EnqueueFlags {
364    /// Spawn a new task.
365    Spawn,
366    /// Wake a sleeping task.
367    Wake,
368}
369
370/// Possible triggers of an `update_current` action.
371#[derive(PartialEq, Copy, Clone)]
372pub enum UpdateFlags {
373    /// Timer interrupt.
374    Tick,
375    /// Task waiting.
376    Wait,
377    /// Task yielding.
378    Yield,
379    /// Task exiting.
380    Exit,
381}
382
383/// Preempts the current task.
384#[track_caller]
385pub(crate) fn might_preempt() {
386    /*
387    if !cpu_local::should_preempt() {
388        return;
389    }
390    reschedule(|local_rq| {
391        let next_task_opt = local_rq.try_pick_next();
392        if let Some(next_task) = next_task_opt {
393            ReschedAction::SwitchTo(next_task.clone())
394        } else {
395            ReschedAction::DoNothing
396        }
397    })
398    */
399}
400
401/// Blocks the current task unless `has_unparked()` returns `true`.
402///
403/// Note that this method may return due to spurious wake events. It's the caller's responsibility
404/// to detect them (if necessary).
405#[track_caller]
406pub(crate) fn park_current<F>(has_unparked: F)
407where
408    F: Fn() -> bool,
409{
410    let mut current = None;
411    let mut is_first_try = true;
412
413    reschedule(|local_rq: &mut dyn LocalRunQueue| {
414        let next_task_opt = if is_first_try {
415            if has_unparked() {
416                return ReschedAction::DoNothing;
417            }
418            is_first_try = false;
419
420            // Note the race conditions: the current task may be woken after the above `has_unparked`
421            // check, but before the below `dequeue_current` action, we need to make sure that the
422            // wakeup event isn't lost.
423            //
424            // Currently, for the FIFO and CFS scheduler, `Scheduler::enqueue` will try to lock `local_rq`
425            // when the above race condition occurs, so it will wait until we finish calling the
426            // `dequeue_current` method and nothing bad will happen. This may need to be revisited
427            // after more complex schedulers are introduced.
428
429            let should_pick_next = local_rq.update_current(UpdateFlags::Wait);
430            current = local_rq.dequeue_current();
431            should_pick_next.then(|| local_rq.pick_next())
432        } else {
433            local_rq.try_pick_next()
434        };
435
436        if let Some(next_task) = next_task_opt {
437            if Arc::ptr_eq(current.as_ref().unwrap(), next_task) {
438                // The current task has been woken and picked as the next runnable task.
439                return ReschedAction::DoNothing;
440            }
441            return ReschedAction::SwitchTo(next_task.clone());
442        }
443
444        ReschedAction::Retry
445    });
446}
447
448/// Unblocks a target task.
449pub(crate) fn unpark_target(runnable: Arc<Task>) {
450    let preempt_cpu = SCHEDULER
451        .get()
452        .unwrap()
453        .enqueue(runnable, EnqueueFlags::Wake);
454    if let Some(preempt_cpu_id) = preempt_cpu {
455        // set_need_preempt(preempt_cpu_id);
456    }
457}
458
459/// Enqueues a newly built task.
460///
461/// Note that the new task is not guaranteed to run at once.
462/*
463#[track_caller]
464pub(super) fn run_new_task(runnable: Arc<Task>) {
465    // FIXME: remove this check for `SCHEDULER`.
466    // Currently OSTD cannot know whether its user has injected a scheduler.
467    if !SCHEDULER.is_completed() {
468        fifo_scheduler::init();
469    }
470
471    let preempt_cpu = SCHEDULER
472        .get()
473        .unwrap()
474        .enqueue(runnable, EnqueueFlags::Spawn);
475    if let Some(preempt_cpu_id) = preempt_cpu {
476        set_need_preempt(preempt_cpu_id);
477    }
478
479    might_preempt();
480}
481*/
482
483/*
484fn set_need_preempt(cpu_id: CpuId) {
485    let preempt_guard = disable_preempt();
486
487    if preempt_guard.current_cpu() == cpu_id {
488        cpu_local::set_need_preempt();
489    } else {
490        crate::smp::inter_processor_call(&CpuSet::from(cpu_id), || {
491            cpu_local::set_need_preempt();
492        });
493    }
494}
495*/
496
497/// Dequeues the current task from its runqueue.
498///
499/// This should only be called if the current is to exit.
500/*
501#[track_caller]
502pub(super) fn exit_current() -> ! {
503    let mut is_first_try = true;
504
505    reschedule(|local_rq: &mut dyn LocalRunQueue| {
506        let next_task_opt = if is_first_try {
507            is_first_try = false;
508            let should_pick_next = local_rq.update_current(UpdateFlags::Exit);
509            let _current = local_rq.dequeue_current();
510            should_pick_next.then(|| local_rq.pick_next())
511        } else {
512            local_rq.try_pick_next()
513        };
514
515        if let Some(next_task) = next_task_opt {
516            ReschedAction::SwitchTo(next_task.clone())
517        } else {
518            ReschedAction::Retry
519        }
520    });
521
522    unreachable!()
523}
524*/
525
526/// Yields execution.
527/*
528#[track_caller]
529pub(super) fn yield_now() {
530    reschedule(|local_rq| {
531        let should_pick_next = local_rq.update_current(UpdateFlags::Yield);
532        let next_task_opt = should_pick_next.then(|| local_rq.pick_next());
533        if let Some(next_task) = next_task_opt {
534            ReschedAction::SwitchTo(next_task.clone())
535        } else {
536            ReschedAction::DoNothing
537        }
538    })
539}
540*/
541
542/// Do rescheduling by acting on the scheduling decision (`ReschedAction`) made by a
543/// user-given closure.
544///
545/// The closure makes the scheduling decision by taking the local runqueue has its input.
546#[track_caller]
547fn reschedule<F>(mut f: F)
548where
549    F: FnMut(&mut dyn LocalRunQueue) -> ReschedAction,
550{
551    // Even if the decision below is `DoNothing`, we should clear this flag. Meanwhile, to avoid
552    // race conditions, we should do this before making the decision.
553    // cpu_local::clear_need_preempt();
554
555    let next_task = loop {
556        let mut action = ReschedAction::DoNothing;
557        SCHEDULER.get().unwrap().mut_local_rq_with(&mut |rq| {
558            action = f(rq);
559        });
560
561        match action {
562            ReschedAction::DoNothing => {
563                return;
564            }
565            ReschedAction::Retry => {
566                continue;
567            }
568            ReschedAction::SwitchTo(next_task) => {
569                break next_task;
570            }
571        };
572    };
573
574    // `switch_to_task` will spin if it finds that the next task is still running on some CPU core,
575    // which guarantees soundness regardless of the scheduler implementation.
576    //
577    // FIXME: The scheduler decision and context switching are not atomic, which can lead to some
578    // strange behavior even if the scheduler is implemented correctly. See "Problem 2" at
579    // <https://github.com/asterinas/asterinas/issues/1633> for details.
580    // processor::switch_to_task(next_task);
581}
582
583/// Possible actions of a rescheduling.
584enum ReschedAction {
585    /// Keep running current task and do nothing.
586    DoNothing,
587    /// Loop until finding a task to swap out the current.
588    Retry,
589    /// Switch to target task.
590    SwitchTo(Arc<Task>),
591}