monitord/
boot.rs

1//! # boot module
2//!
3//! Collects boot blame metrics showing the slowest units at boot.
4//! Similar to `systemd-analyze blame` but stores N slowest units.
5
6use std::collections::HashMap;
7use std::sync::Arc;
8
9use anyhow::Result;
10use tokio::sync::RwLock;
11use tracing::debug;
12use zbus::zvariant::ObjectPath;
13
14use crate::config::Config;
15use crate::dbus::zbus_systemd::ManagerProxy;
16use crate::dbus::zbus_unit::UnitProxy;
17use crate::MachineStats;
18
19/// Boot blame statistics: maps unit name to activation time in seconds
20pub type BootBlameStats = HashMap<String, f64>;
21
22/// Calculate the activation time for a unit
23/// Returns the time in seconds from InactiveExitTimestamp to ActiveEnterTimestamp
24async fn get_unit_activation_time(
25    connection: &zbus::Connection,
26    unit_path: &ObjectPath<'_>,
27) -> Result<f64> {
28    let unit_proxy = UnitProxy::builder(connection)
29        .path(unit_path)?
30        .build()
31        .await?;
32
33    let inactive_exit = unit_proxy.inactive_exit_timestamp().await?;
34    let active_enter = unit_proxy.active_enter_timestamp().await?;
35
36    // If either timestamp is 0, the unit hasn't been activated or the timing is invalid
37    if inactive_exit == 0 || active_enter == 0 {
38        return Ok(0.0);
39    }
40
41    // Calculate activation time in seconds (timestamps are in microseconds)
42    let activation_time_usec = active_enter.saturating_sub(inactive_exit);
43    let activation_time_sec = activation_time_usec as f64 / 1_000_000.0;
44
45    Ok(activation_time_sec)
46}
47
48/// Update boot blame statistics with the N slowest units at boot
49pub async fn update_boot_blame_stats(
50    config: Arc<Config>,
51    connection: zbus::Connection,
52    machine_stats: Arc<RwLock<MachineStats>>,
53) -> Result<()> {
54    debug!("Starting boot blame stats collection");
55
56    let systemd_proxy = ManagerProxy::new(&connection).await?;
57    let units = systemd_proxy.list_units().await?;
58
59    let mut unit_times: Vec<(String, f64)> = Vec::new();
60
61    // Collect activation times for all units
62    for unit_info in units {
63        let unit_name = unit_info.0;
64        let unit_path = unit_info.6;
65
66        // Apply blocklist: skip units explicitly excluded
67        if config.boot_blame.blocklist.contains(&unit_name) {
68            debug!("Skipping boot blame for {} due to blocklist", &unit_name);
69            continue;
70        }
71        // Apply allowlist: if non-empty, only include listed units
72        if !config.boot_blame.allowlist.is_empty()
73            && !config.boot_blame.allowlist.contains(&unit_name)
74        {
75            continue;
76        }
77
78        match get_unit_activation_time(&connection, &unit_path).await {
79            Ok(time) if time > 0.0 => {
80                unit_times.push((unit_name, time));
81            }
82            Ok(_) => {
83                // Unit has no activation time (0.0), skip it
84            }
85            Err(e) => {
86                debug!("Failed to get activation time for {}: {}", unit_name, e);
87            }
88        }
89    }
90
91    // Sort by activation time in descending order (slowest first)
92    unit_times.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
93
94    // Take only the N slowest units
95    let num_slowest = config.boot_blame.num_slowest_units as usize;
96    unit_times.truncate(num_slowest);
97
98    // Convert to HashMap
99    let boot_blame_stats: BootBlameStats = unit_times.into_iter().collect();
100
101    debug!("Collected {} boot blame stats", boot_blame_stats.len());
102
103    // Update machine stats
104    let mut stats = machine_stats.write().await;
105    stats.boot_blame = Some(boot_blame_stats);
106
107    Ok(())
108}