monitord/
pid1.rs

1//! # pid1 module
2//!
3//! `pid1` uses procfs to get some statistics on Linux's more important
4//! process pid1. These metrics can help ensure newer systemds don't regress
5//! or show stange behavior. E.g. more file descriptors without more units.
6
7use std::sync::Arc;
8
9#[cfg(target_os = "linux")]
10use procfs::process::Process;
11use thiserror::Error;
12use tokio::sync::RwLock;
13use tracing::error;
14
15use crate::MachineStats;
16
17#[derive(Error, Debug)]
18pub enum MonitordPid1Error {
19    #[cfg(target_os = "linux")]
20    #[error("Procfs error: {0}")]
21    ProcfsError(#[from] procfs::ProcError),
22    #[error("Integer conversion error: {0}")]
23    IntConversion(#[from] std::num::TryFromIntError),
24}
25
26/// Process-level statistics for PID 1 (systemd) read from procfs.
27/// These metrics help detect regressions or anomalies in the init process itself.
28/// Ref: <https://manpages.debian.org/buster/manpages/procfs.5.en.html>
29#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default, Eq, PartialEq)]
30pub struct Pid1Stats {
31    /// CPU time spent in kernel mode by PID 1, in seconds (from /proc/1/stat stime, converted from ticks)
32    pub cpu_time_kernel: u64,
33    /// CPU time spent in user mode by PID 1, in seconds (from /proc/1/stat utime, converted from ticks)
34    pub cpu_time_user: u64,
35    /// Resident set size of PID 1 in bytes (from /proc/1/stat rss, converted from pages)
36    pub memory_usage_bytes: u64,
37    /// Number of open file descriptors held by PID 1 (from /proc/1/fd/)
38    pub fd_count: u64,
39    /// Number of threads/tasks belonging to PID 1 (from /proc/1/task/)
40    pub tasks: u64,
41}
42
43/// Get procfs info on pid 1 - <https://manpages.debian.org/buster/manpages/procfs.5.en.html>
44#[cfg(target_os = "linux")]
45pub fn get_pid_stats(pid: i32) -> Result<Pid1Stats, MonitordPid1Error> {
46    let bytes_per_page = procfs::page_size();
47    let ticks_per_second = procfs::ticks_per_second();
48
49    let pid1_proc = Process::new(pid)?;
50    let stat_file = pid1_proc.stat()?;
51
52    // Living with integer rounding
53    Ok(Pid1Stats {
54        cpu_time_kernel: (stat_file.stime) / (ticks_per_second),
55        cpu_time_user: (stat_file.utime) / (ticks_per_second),
56        memory_usage_bytes: (stat_file.rss) * (bytes_per_page),
57        fd_count: pid1_proc.fd_count()?.try_into()?,
58        // Using 0 as impossible number of tasks
59        tasks: pid1_proc
60            .tasks()?
61            .flatten()
62            .collect::<Vec<_>>()
63            .len()
64            .try_into()?,
65    })
66}
67
68#[cfg(not(target_os = "linux"))]
69pub fn get_pid_stats(_pid: i32) -> Result<Pid1Stats, MonitordPid1Error> {
70    error!("pid1 stats not supported on this OS");
71    Ok(Pid1Stats::default())
72}
73
74/// Async wrapper than can update PID1 stats when passed a locked struct
75pub async fn update_pid1_stats(
76    pid: i32,
77    locked_machine_stats: Arc<RwLock<MachineStats>>,
78) -> anyhow::Result<()> {
79    let pid1_stats = match tokio::task::spawn_blocking(move || get_pid_stats(pid)).await {
80        Ok(p1s) => p1s,
81        Err(err) => return Err(err.into()),
82    };
83
84    let mut machine_stats = locked_machine_stats.write().await;
85    machine_stats.pid1 = match pid1_stats {
86        Ok(s) => Some(s),
87        Err(err) => {
88            error!("Unable to set pid1 stats: {:?}", err);
89            None
90        }
91    };
92
93    Ok(())
94}
95
96#[cfg(target_os = "linux")]
97#[cfg(test)]
98pub mod tests {
99    use super::*;
100
101    #[test]
102    pub fn test_get_stats() -> Result<(), MonitordPid1Error> {
103        let pid1_stats = get_pid_stats(1)?;
104        assert!(pid1_stats.tasks > 0);
105        Ok(())
106    }
107}