monitord/
boot.rs

1//! # boot module
2//!
3//! Collects boot blame metrics showing the slowest units at boot.
4//! Similar to `systemd-analyze blame` but stores N slowest units.
5
6use std::collections::HashMap;
7use std::sync::Arc;
8
9use anyhow::Result;
10use tokio::sync::RwLock;
11use tracing::debug;
12use zbus::zvariant::ObjectPath;
13
14use crate::config::Config;
15use crate::dbus::zbus_systemd::ManagerProxy;
16use crate::dbus::zbus_unit::UnitProxy;
17use crate::MachineStats;
18
19/// Boot blame statistics: maps unit name to activation time in seconds
20pub type BootBlameStats = HashMap<String, f64>;
21
22/// Calculate the activation time for a unit
23/// Returns the time in seconds from InactiveExitTimestamp to ActiveEnterTimestamp
24async fn get_unit_activation_time(
25    connection: &zbus::Connection,
26    unit_path: &ObjectPath<'_>,
27) -> Result<f64> {
28    let unit_proxy = UnitProxy::builder(connection)
29        .cache_properties(zbus::proxy::CacheProperties::No)
30        .path(unit_path)?
31        .build()
32        .await?;
33
34    let inactive_exit = unit_proxy.inactive_exit_timestamp().await?;
35    let active_enter = unit_proxy.active_enter_timestamp().await?;
36
37    // If either timestamp is 0, the unit hasn't been activated or the timing is invalid
38    if inactive_exit == 0 || active_enter == 0 {
39        return Ok(0.0);
40    }
41
42    // Calculate activation time in seconds (timestamps are in microseconds)
43    let activation_time_usec = active_enter.saturating_sub(inactive_exit);
44    let activation_time_sec = activation_time_usec as f64 / 1_000_000.0;
45
46    Ok(activation_time_sec)
47}
48
49/// Update boot blame statistics with the N slowest units at boot
50pub async fn update_boot_blame_stats(
51    config: Arc<Config>,
52    connection: zbus::Connection,
53    machine_stats: Arc<RwLock<MachineStats>>,
54) -> Result<()> {
55    debug!("Starting boot blame stats collection");
56
57    let systemd_proxy = ManagerProxy::builder(&connection)
58        .cache_properties(zbus::proxy::CacheProperties::No)
59        .build()
60        .await?;
61    let units = systemd_proxy.list_units().await?;
62
63    let mut unit_times: Vec<(String, f64)> = Vec::new();
64
65    // Collect activation times for all units
66    for unit_info in units {
67        let unit_name = unit_info.0;
68        let unit_path = unit_info.6;
69
70        // Apply blocklist: skip units explicitly excluded
71        if config.boot_blame.blocklist.contains(&unit_name) {
72            debug!("Skipping boot blame for {} due to blocklist", &unit_name);
73            continue;
74        }
75        // Apply allowlist: if non-empty, only include listed units
76        if !config.boot_blame.allowlist.is_empty()
77            && !config.boot_blame.allowlist.contains(&unit_name)
78        {
79            continue;
80        }
81
82        match get_unit_activation_time(&connection, &unit_path).await {
83            Ok(time) if time > 0.0 => {
84                unit_times.push((unit_name, time));
85            }
86            Ok(_) => {
87                // Unit has no activation time (0.0), skip it
88            }
89            Err(e) => {
90                debug!("Failed to get activation time for {}: {}", unit_name, e);
91            }
92        }
93    }
94
95    // Sort by activation time in descending order (slowest first)
96    unit_times.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
97
98    // Take only the N slowest units
99    let num_slowest = config.boot_blame.num_slowest_units as usize;
100    unit_times.truncate(num_slowest);
101
102    // Convert to HashMap
103    let boot_blame_stats: BootBlameStats = unit_times.into_iter().collect();
104
105    debug!("Collected {} boot blame stats", boot_blame_stats.len());
106
107    // Update machine stats
108    let mut stats = machine_stats.write().await;
109    stats.boot_blame = Some(boot_blame_stats);
110
111    Ok(())
112}