Skip to main content

ostd/task/scheduler/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2//! Task scheduling.
3//!
4//! # Scheduler Injection
5//!
6//! The task scheduler of an OS is a complex beast,
7//! and the most suitable scheduling algorithm often depends on the target usage scenario.
8//! To avoid code bloat and offer flexibility,
9//! OSTD does not include a gigantic, one-size-fits-all task scheduler.
10//! Instead, it allows the client to implement a custom scheduler (in safe Rust, of course)
11//! and register it with OSTD.
12//! This feature is known as **scheduler injection**.
13//!
14//! The client kernel performs scheduler injection via the [`inject_scheduler`] API.
15//! This API should be called as early as possible during kernel initialization,
16//! before any [`Task`]-related APIs are used.
17//! This requirement is reasonable since `Task`s depend on the scheduler.
18//!
19//! # Scheduler Abstraction
20//!
21//! The `inject_scheduler` API accepts an object implementing the [`Scheduler`] trait,
22//! which abstracts over any SMP-aware task scheduler.
23//! Whenever an OSTD client spawns a new task (via [`crate::task::TaskOptions`])
24//! or wakes a sleeping task (e.g., via [`crate::sync::Waker`]),
25//! OSTD internally forwards the corresponding `Arc<Task>`
26//! to the scheduler by invoking the [`Scheduler::enqueue`] method.
27//! This allows the injected scheduler to manage all runnable tasks.
28//!
29//! Each enqueued task is dispatched to one of the per-CPU local runqueues,
30//! which manage all runnable tasks on a specific CPU.
31//! A local runqueue is abstracted by the [`LocalRunQueue`] trait.
32//! OSTD accesses the local runqueue of the current CPU
33//! via [`Scheduler::local_rq_with`] or [`Scheduler::mut_local_rq_with`],
34//! which return immutable and mutable references to `dyn LocalRunQueue`, respectively.
35//!
36//! The [`LocalRunQueue`] trait enables OSTD to inspect and manipulate local runqueues.
37//! For instance, OSTD invokes the [`LocalRunQueue::pick_next`] method
38//! to let the scheduler select the next task to run.
39//! OSTD then performs a context switch to that task,
40//! which becomes the _current_ running task, accessible via [`LocalRunQueue::current`].
41//! When the current task is about to sleep (e.g., via [`crate::sync::Waiter`]),
42//! OSTD removes it from the local runqueue using [`LocalRunQueue::dequeue_current`].
43//!
44//! The interfaces of `Scheduler` and `LocalRunQueue` are simple
45//! yet (perhaps surprisingly) powerful enough to support
46//! even complex and advanced task scheduler implementations.
47//! Scheduler implementations are free to employ any load-balancing strategy
48//! to dispatch enqueued tasks across local runqueues,
49//! and each local runqueue is free to choose any prioritization strategy
50//! for selecting the next task to run.
51//! Based on OSTD's scheduling abstractions,
52//! the Asterinas kernel has successfully supported multiple Linux scheduling classes,
53//! including both real-time and normal policies.
54//!
55//! # Safety Impact
56//!
57//! While OSTD delegates scheduling decisions to the injected task scheduler,
58//! it verifies these decisions to avoid undefined behavior.
59//! In particular, it enforces the following safety invariant:
60//!
61//! > A task must not be scheduled to run on more than one CPU at a time.
62//!
63//! Violating this invariant—e.g., running the same task on two CPUs concurrently—
64//! can have catastrophic consequences,
65//! as the task's stack and internal state may be corrupted by concurrent modifications.
66// mod fifo_scheduler;
67// pub mod info;
68use alloc::sync::Arc;
69use spin::Once;
70
71// use super::{preempt::cpu_local, processor, Task};
72use super::Task;
73// use crate::{
74//     cpu::{CpuId, CpuSet, PinCurrentCpu},
75//     prelude::*,
76//     task::disable_preempt,
77//     timer,
78// };
79use crate::specs::mm::cpu::CpuId;
80
81/// Injects a custom implementation of task scheduler into OSTD.
82///
83/// This function can only be called once and must be called during the initialization phase of kernel,
84/// before any [`Task`]-related APIs are invoked.
85pub fn inject_scheduler(scheduler: &'static dyn Scheduler<Task>) {
86    /* SCHEDULER.call_once(|| scheduler);
87
88    timer::register_callback(|| {
89        SCHEDULER.get().unwrap().mut_local_rq_with(&mut |local_rq| {
90            let should_pick_next = local_rq.update_current(UpdateFlags::Tick);
91            if should_pick_next {
92                cpu_local::set_need_preempt();
93            }
94        })
95    }); */
96}
97
98static SCHEDULER: Once<&'static dyn Scheduler<Task>> = Once::new();
99
100/// A SMP-aware task scheduler.
101pub trait Scheduler<T = Task>: Sync + Send {
102    /// Enqueues a runnable task.
103    ///
104    /// The scheduler implementer can perform load-balancing or some time accounting work here.
105    ///
106    /// The newly-enqueued task may have a higher priority than the currently running one on a CPU
107    /// and thus should preempt the latter.
108    /// In this case, this method returns the ID of that CPU.
109    fn enqueue(&self, runnable: Arc<T>, flags: EnqueueFlags) -> Option<CpuId>;
110
111    /// Gets an immutable access to the local runqueue of the current CPU.
112    fn local_rq_with(&self, f: &mut dyn FnMut(&dyn LocalRunQueue<T>));
113
114    /// Gets a mutable access to the local runqueue of the current CPU.
115    fn mut_local_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue<T>));
116}
117
118/// A per-CPU, local runqueue.
119///
120/// This abstraction allows OSTD to inspect and manipulate local runqueues.
121///
122/// Conceptually, a local runqueue maintains:
123/// 1. A priority queue of runnable tasks.
124///    The definition of "priority" is left to the concrete implementation.
125/// 2. The current running task.
126///
127/// # Interactions with OSTD
128///
129/// ## Overview
130///
131/// It is crucial for implementers of `LocalRunQueue`
132/// to understand how OSTD interacts with local runqueues.
133///
134/// A local runqueue is consulted by OSTD in response to one of four scheduling events:
135/// - **Yielding**, triggered by [`Task::yield_now`], where the current task voluntarily gives up CPU time.
136/// - **Sleeping**, triggered by [`crate::sync::Waiter::wait`]
137///   or any synchronization primitive built upon it (e.g., [`crate::sync::WaitQueue`], [`crate::sync::Mutex`]),
138///   which blocks the current task until a wake-up event occurs.
139/// - **Ticking**, triggered periodically by the system timer
140///   (see [`crate::arch::timer::TIMER_FREQ`]),
141///   which provides an opportunity to do time accounting and consider preemption.
142/// - **Exiting**, triggered when the execution logic of a task has come to an end,
143///   which informs the scheduler that the task is exiting and will never be enqueued again.
144///
145/// The general workflow for OSTD to handle a scheduling event is as follows:
146/// 1. Acquire exclusive access to the local runqueue using [`Scheduler::mut_local_rq_with`].
147/// 2. Call [`LocalRunQueue::update_current`] to update the current task's state,
148///    returning a boolean value that indicates
149///    whether the current task should and can be replaced with another runnable task.
150/// 3. If the task is about to sleep or exit, call [`LocalRunQueue::dequeue_current`]
151///    to remove it from the runqueue.
152/// 4. If the return value of `update_current` in Step 2 is true,
153///    then select the next task to run with [`LocalRunQueue::pick_next`].
154///
155/// ## When to Pick the Next Task?
156///
157/// As shown above,
158/// OSTD guarantees that `pick_next` is only called
159/// when the current task should and can be replaced.
160/// This avoids unnecessary invocations and improves efficiency.
161///
162/// But under what conditions should the current task be replaced?
163/// Two criteria must be met:
164/// 1. There exists at least one other runnable task in the runqueue.
165/// 2. That task should preempt the current one, if present.
166///
167/// Some implications of these rules:
168/// - If the runqueue is empty, `update_current` must return `false`—there's nothing to run.
169/// - If the runqueue is non-empty but the current task is absent,
170///   `update_current` should return `true`—anything is better than nothing.
171/// - If the runqueue is non-empty and the flag is `UpdateFlags::WAIT`,
172///   `update_current` should also return `true`,
173///   because the current task is about to block.
174/// - In other cases, the return value depends on the scheduler's prioritization policy.
175///   For instance, a real-time task may only be preempted by a higher-priority task
176///   or if it explicitly yields.
177///   A normal task under Linux's CFS may be preempted by a task with smaller vruntime,
178///   but never by the idle task.
179///
180/// When OSTD is unsure about whether the current task should or can be replaced,
181/// it will invoke [`LocalRunQueue::try_pick_next`], the fallible version of `pick_next`.
182///
183/// ## Internal Working
184///
185/// To guide scheduler implementers,
186/// we provide a simplified view of how OSTD interacts with local runqueues _internally_
187/// in order to handle the four scheduling events.
188///
189/// ### Yielding
190///
191/// ```
192/// # use ostd::prelude::*;
193/// # use ostd::task::{*, scheduler::*};
194/// #
195/// # fn switch_to(next: Arc<Task>) {}
196/// #
197/// /// Yields the current task.
198/// fn yield(scheduler: &'static dyn Scheduler) {
199///     let next_task_opt: Option<Arc<Task>> = scheduler.mut_local_rq_with(|local_rq| {
200///         let should_pick_next = local_rq.update_current(UpdateFlags::Yield);
201///         should_pick_next.then(|| local_rq.pick_next().clone())
202///     });
203///     let Some(next_task) = next_task_opt {
204///         switch_to(next_task);
205///     }
206/// }
207/// ```
208///
209/// ### Sleeping
210///
211/// ```
212/// # use ostd::prelude::*;
213/// # use ostd::task::{*, scheduler::*};
214/// #
215/// # fn switch_to(next: Arc<Task>) {}
216/// #
217/// /// Puts the current task to sleep.
218/// ///
219/// /// The function takes a closure to check if the task is woken.
220/// /// This function is used internally to guard against race conditions,
221/// /// where the task is woken just before it goes to sleep.
222/// fn sleep<F: Fn() -> bool>(scheduler: &'static dyn Scheduler, is_woken: F) {
223///     let mut next_task_opt: Option<Arc<Task>> = None;
224///     let mut is_first_try = true;
225///     while scheduler.mut_local_rq_with(|local_rq| {
226///         if is_first_try {
227///             if is_woken() {
228///                 return false; // exit loop
229///             }
230///             is_first_try = false;
231///
232///             let should_pick_next = local_rq.update_current(UpdateFlags::Wait);
233///             let _current = local_rq.dequeue_current();
234///             if !should_pick_next {
235///                 return true; // continue loop
236///             }
237///             next_task_opt = Some(local_rq.pick_next().clone());
238///             false // exit loop
239///         } else {
240///             next_task_opt = local_rq.try_pick_next().cloned();
241///             next_task_opt.is_none()
242///         }
243///     }) {}
244///     let Some(next_task) = next_task_opt {
245///         switch_to(next_task);
246///     }
247/// }
248/// ```
249///
250/// ### Ticking
251///
252/// ```
253/// # use ostd::prelude::*;
254/// # use ostd::task::{*, scheduler::*};
255/// #
256/// # fn switch_to(next: Arc<Task>) {}
257/// # mod cpu_local {
258/// #     fn set_need_preempt();
259/// #     fn should_preempt() -> bool;
260/// # }
261/// #
262/// /// A callback to be invoked periodically by the timer interrupt.
263/// fn on_tick(scheduler: &'static dyn Scheduler) {
264///     scheduler.mut_local_rq_with(|local_rq| {
265///         let should_pick_next = local_rq.update_current(UpdateFlags::Tick);
266///         if should_pick_next {
267///             cpu_local::set_need_preempt();
268///         }
269///     });
270/// }
271///
272/// /// A preemption point, called at an earliest convenient timing
273/// /// when OSTD can safely preempt the current running task.
274/// fn might_preempt(scheduler: &'static dyn Scheduler) {
275///     if !cpu_local::should_preempt() {
276///         return;
277///     }
278///     let next_task_opt: Option<Arc<Task>> = scheduler
279///         .mut_local_rq_with(|local_rq| local_rq.try_pick_next().cloned())
280///     let Some(next_task) = next_task_opt {
281///         switch_to(next_task);
282///     }
283/// }
284/// ```
285///
286/// ### Exiting
287///
288/// ```
289/// # use ostd::prelude::*;
290/// # use ostd::task::{*, scheduler::*};
291/// #
292/// # fn switch_to(next: Arc<Task>) {}
293/// #
294/// /// Exits the current task.
295/// fn exit(scheduler: &'static dyn Scheduler) {
296///     let mut next_task_opt: Option<Arc<Task>> = None;
297///     let mut is_first_try = true;
298///     while scheduler.mut_local_rq_with(|local_rq| {
299///         if is_first_try {
300///             is_first_try = false;
301///             let should_pick_next = local_rq.update_current(UpdateFlags::Exit);
302///             let _current = local_rq.dequeue_current();
303///             if !should_pick_next {
304///                 return true; // continue loop
305///             }
306///             next_task_opt = Some(local_rq.pick_next().clone());
307///             false // exit loop
308///         } else {
309///             next_task_opt = local_rq.try_pick_next().cloned();
310///             next_task_opt.is_none()
311///         }
312///     }) {}
313///     let next_task = next_task_opt.unwrap();
314///     switch_to(next_task);
315/// }
316/// ```
317pub trait LocalRunQueue<T = Task> {
318    /// Gets the current runnable task.
319    fn current(&self) -> Option<&Arc<T>>;
320
321    /// Updates the current runnable task's scheduling statistics and
322    /// potentially its position in the runqueue.
323    ///
324    /// The return value of this method indicates whether an invocation of `pick_next` should be followed
325    /// to find another task to replace the current one.
326    #[must_use]
327    fn update_current(&mut self, flags: UpdateFlags) -> bool;
328
329    /// Picks the next runnable task.
330    ///
331    /// This method instructs the local runqueue to pick the next runnable task and replace the current one.
332    /// A reference to the new "current" task will be returned by this method.
333    /// If the "old" current task presents, then it is still runnable and thus remains in the runqueue.
334    ///
335    /// # Panics
336    ///
337    /// As explained in the type-level Rust doc,
338    /// this method will only be invoked by OSTD after a call to `update_current` returns true.
339    /// In case that this contract is broken by the caller,
340    /// the implementer is free to exhibit any undesirable or incorrect behaviors, include panicking.
341    fn pick_next(&mut self) -> &Arc<T> {
342        self.try_pick_next().unwrap()
343    }
344
345    /// Tries to pick the next runnable task.
346    ///
347    /// This method instructs the local runqueue to pick the next runnable task on a best-effort basis.
348    /// If such a task can be picked, then this task supersedes the current task and
349    /// the new the method returns a reference to the new "current" task.
350    /// If the "old" current task presents, then it is still runnable and thus remains in the runqueue.
351    fn try_pick_next(&mut self) -> Option<&Arc<T>>;
352
353    /// Removes the current runnable task from runqueue.
354    ///
355    /// This method returns the current runnable task.
356    /// If there is no current runnable task, this method returns `None`.
357    fn dequeue_current(&mut self) -> Option<Arc<T>>;
358}
359
360/// Possible triggers of an `enqueue` action.
361#[derive(PartialEq, Copy, Clone)]
362pub enum EnqueueFlags {
363    /// Spawn a new task.
364    Spawn,
365    /// Wake a sleeping task.
366    Wake,
367}
368
369/// Possible triggers of an `update_current` action.
370#[derive(PartialEq, Copy, Clone)]
371pub enum UpdateFlags {
372    /// Timer interrupt.
373    Tick,
374    /// Task waiting.
375    Wait,
376    /// Task yielding.
377    Yield,
378    /// Task exiting.
379    Exit,
380}
381
382/// Preempts the current task.
383#[track_caller]
384pub(crate) fn might_preempt() {
385    /*
386    if !cpu_local::should_preempt() {
387        return;
388    }
389    reschedule(|local_rq| {
390        let next_task_opt = local_rq.try_pick_next();
391        if let Some(next_task) = next_task_opt {
392            ReschedAction::SwitchTo(next_task.clone())
393        } else {
394            ReschedAction::DoNothing
395        }
396    })
397    */
398}
399
400/// Blocks the current task unless `has_unparked()` returns `true`.
401///
402/// Note that this method may return due to spurious wake events. It's the caller's responsibility
403/// to detect them (if necessary).
404#[track_caller]
405pub(crate) fn park_current<F>(has_unparked: F)
406where
407    F: Fn() -> bool,
408{
409    let mut current = None;
410    let mut is_first_try = true;
411
412    reschedule(|local_rq: &mut dyn LocalRunQueue| {
413        let next_task_opt = if is_first_try {
414            if has_unparked() {
415                return ReschedAction::DoNothing;
416            }
417            is_first_try = false;
418
419            // Note the race conditions: the current task may be woken after the above `has_unparked`
420            // check, but before the below `dequeue_current` action, we need to make sure that the
421            // wakeup event isn't lost.
422            //
423            // Currently, for the FIFO and CFS scheduler, `Scheduler::enqueue` will try to lock `local_rq`
424            // when the above race condition occurs, so it will wait until we finish calling the
425            // `dequeue_current` method and nothing bad will happen. This may need to be revisited
426            // after more complex schedulers are introduced.
427
428            let should_pick_next = local_rq.update_current(UpdateFlags::Wait);
429            current = local_rq.dequeue_current();
430            should_pick_next.then(|| local_rq.pick_next())
431        } else {
432            local_rq.try_pick_next()
433        };
434
435        if let Some(next_task) = next_task_opt {
436            if Arc::ptr_eq(current.as_ref().unwrap(), next_task) {
437                // The current task has been woken and picked as the next runnable task.
438                return ReschedAction::DoNothing;
439            }
440            return ReschedAction::SwitchTo(next_task.clone());
441        }
442
443        ReschedAction::Retry
444    });
445}
446
447/// Unblocks a target task.
448pub(crate) fn unpark_target(runnable: Arc<Task>) {
449    let preempt_cpu = SCHEDULER
450        .get()
451        .unwrap()
452        .enqueue(runnable, EnqueueFlags::Wake);
453    if let Some(preempt_cpu_id) = preempt_cpu {
454        // set_need_preempt(preempt_cpu_id);
455    }
456}
457
458/// Enqueues a newly built task.
459///
460/// Note that the new task is not guaranteed to run at once.
461/*
462#[track_caller]
463pub(super) fn run_new_task(runnable: Arc<Task>) {
464    // FIXME: remove this check for `SCHEDULER`.
465    // Currently OSTD cannot know whether its user has injected a scheduler.
466    if !SCHEDULER.is_completed() {
467        fifo_scheduler::init();
468    }
469
470    let preempt_cpu = SCHEDULER
471        .get()
472        .unwrap()
473        .enqueue(runnable, EnqueueFlags::Spawn);
474    if let Some(preempt_cpu_id) = preempt_cpu {
475        set_need_preempt(preempt_cpu_id);
476    }
477
478    might_preempt();
479}
480*/
481
482/*
483fn set_need_preempt(cpu_id: CpuId) {
484    let preempt_guard = disable_preempt();
485
486    if preempt_guard.current_cpu() == cpu_id {
487        cpu_local::set_need_preempt();
488    } else {
489        crate::smp::inter_processor_call(&CpuSet::from(cpu_id), || {
490            cpu_local::set_need_preempt();
491        });
492    }
493}
494*/
495
496/// Dequeues the current task from its runqueue.
497///
498/// This should only be called if the current is to exit.
499/*
500#[track_caller]
501pub(super) fn exit_current() -> ! {
502    let mut is_first_try = true;
503
504    reschedule(|local_rq: &mut dyn LocalRunQueue| {
505        let next_task_opt = if is_first_try {
506            is_first_try = false;
507            let should_pick_next = local_rq.update_current(UpdateFlags::Exit);
508            let _current = local_rq.dequeue_current();
509            should_pick_next.then(|| local_rq.pick_next())
510        } else {
511            local_rq.try_pick_next()
512        };
513
514        if let Some(next_task) = next_task_opt {
515            ReschedAction::SwitchTo(next_task.clone())
516        } else {
517            ReschedAction::Retry
518        }
519    });
520
521    unreachable!()
522}
523*/
524
525/// Yields execution.
526/*
527#[track_caller]
528pub(super) fn yield_now() {
529    reschedule(|local_rq| {
530        let should_pick_next = local_rq.update_current(UpdateFlags::Yield);
531        let next_task_opt = should_pick_next.then(|| local_rq.pick_next());
532        if let Some(next_task) = next_task_opt {
533            ReschedAction::SwitchTo(next_task.clone())
534        } else {
535            ReschedAction::DoNothing
536        }
537    })
538}
539*/
540
541/// Do rescheduling by acting on the scheduling decision (`ReschedAction`) made by a
542/// user-given closure.
543///
544/// The closure makes the scheduling decision by taking the local runqueue has its input.
545#[track_caller]
546fn reschedule<F>(mut f: F)
547where
548    F: FnMut(&mut dyn LocalRunQueue) -> ReschedAction,
549{
550    // Even if the decision below is `DoNothing`, we should clear this flag. Meanwhile, to avoid
551    // race conditions, we should do this before making the decision.
552    // cpu_local::clear_need_preempt();
553
554    let next_task = loop {
555        let mut action = ReschedAction::DoNothing;
556        SCHEDULER.get().unwrap().mut_local_rq_with(&mut |rq| {
557            action = f(rq);
558        });
559
560        match action {
561            ReschedAction::DoNothing => {
562                return;
563            }
564            ReschedAction::Retry => {
565                continue;
566            }
567            ReschedAction::SwitchTo(next_task) => {
568                break next_task;
569            }
570        };
571    };
572
573    // `switch_to_task` will spin if it finds that the next task is still running on some CPU core,
574    // which guarantees soundness regardless of the scheduler implementation.
575    //
576    // FIXME: The scheduler decision and context switching are not atomic, which can lead to some
577    // strange behavior even if the scheduler is implemented correctly. See "Problem 2" at
578    // <https://github.com/asterinas/asterinas/issues/1633> for details.
579    // processor::switch_to_task(next_task);
580}
581
582/// Possible actions of a rescheduling.
583enum ReschedAction {
584    /// Keep running current task and do nothing.
585    DoNothing,
586    /// Loop until finding a task to swap out the current.
587    Retry,
588    /// Switch to target task.
589    SwitchTo(Arc<Task>),
590}