monitord/
verify.rs

1//! # verify module
2//!
3//! Collects systemd unit verification errors by running `systemd-analyze verify`
4//! on all unit files and parsing the output. Tracks counts of failing units by type.
5
6use std::collections::{HashMap, HashSet};
7use std::process::Command;
8use std::sync::Arc;
9
10use thiserror::Error;
11use tokio::sync::RwLock;
12
13use crate::MachineStats;
14
15#[derive(Error, Debug)]
16pub enum MonitordVerifyError {
17    #[error("Failed to execute systemd-analyze: {0}")]
18    CommandError(String),
19    #[error("Unable to connect to D-Bus via zbus: {0:#}")]
20    ZbusError(#[from] zbus::Error),
21}
22
23/// Statistics about unit verification errors, aggregated by unit type (service, slice, timer, etc.)
24#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default, Eq, PartialEq)]
25pub struct VerifyStats {
26    /// Total count of units with verification failures
27    pub total: u64,
28    /// Count of failing units per type (e.g., "service", "slice", "timer")
29    /// Only includes types that have at least one failure
30    #[serde(flatten)]
31    pub by_type: HashMap<String, u64>,
32}
33
34/// Extract unit type from a unit name (e.g., "foo.service" -> "service")
35fn get_unit_type(unit_name: &str) -> Option<String> {
36    // Filter out obviously invalid unit names
37    if unit_name.len() < 3 {
38        return None;
39    }
40
41    // Check if it starts with a valid character (alphanumeric, dash, underscore, backslash for escapes)
42    let first_char = unit_name.chars().next()?;
43    if !first_char.is_alphanumeric() && first_char != '-' && first_char != '\\' {
44        return None;
45    }
46
47    unit_name.rsplit('.').next().map(|s| s.to_string())
48}
49
50/// Parse systemd-analyze verify output to extract failing unit names
51/// Output format examples:
52/// - "Unit foo.service not found."
53/// - "/path/to/foo.service:5: Unknown key..."
54/// - "foo.service: Command ... failed..."
55fn parse_verify_output(stderr: &str) -> HashSet<String> {
56    let mut failing_units = HashSet::new();
57
58    for line in stderr.lines() {
59        let trimmed = line.trim();
60        if trimmed.is_empty() {
61            continue;
62        }
63
64        // Skip "Failed to prepare filename" lines - these are input errors, not unit errors
65        if trimmed.contains("Failed to prepare filename") {
66            continue;
67        }
68
69        let mut found_in_line = false;
70
71        // Format 1: "/path/file.service:line: message" - extract just the filename
72        if line.starts_with('/') {
73            if let Some(pos) = line.find(':') {
74                let path_part = &line[..pos];
75                if let Some(filename) = path_part.rsplit('/').next() {
76                    if filename.contains('.') && get_unit_type(filename).is_some() {
77                        failing_units.insert(filename.to_string());
78                        found_in_line = true;
79                    }
80                }
81            }
82        }
83
84        // Format 2: "Unit foo.service ..." or "foo.service: ..." - only if not already found from path
85        if !found_in_line {
86            for word in line.split_whitespace() {
87                let cleaned = word.trim_end_matches(':').trim_end_matches('.');
88                // Only consider it a unit name if it has a valid extension and looks reasonable
89                if cleaned.contains('.')
90                    && cleaned.len() > 2 // Minimum reasonable length
91                    && !cleaned.contains('(') // Skip things like "foo(8)"
92                    && get_unit_type(cleaned).is_some()
93                {
94                    failing_units.insert(cleaned.to_string());
95                    break; // Only take first unit name per line
96                }
97            }
98        }
99    }
100
101    failing_units
102}
103
104/// Collect verification stats for all units in the system
105pub async fn get_verify_stats(
106    connection: &zbus::Connection,
107    allowlist: &HashSet<String>,
108    blocklist: &HashSet<String>,
109) -> Result<VerifyStats, MonitordVerifyError> {
110    let mut stats = VerifyStats::default();
111
112    // Get list of all units from systemd
113    let manager_proxy = crate::dbus::zbus_systemd::ManagerProxy::new(connection).await?;
114    let all_units = manager_proxy.list_units().await?;
115
116    // Filter units based on allowlist/blocklist
117    let units_to_check: Vec<String> = all_units
118        .into_iter()
119        .map(|unit| unit.0)
120        .filter(|unit_name| {
121            // Apply allowlist
122            if !allowlist.is_empty() && !allowlist.contains(unit_name) {
123                return false;
124            }
125            // Apply blocklist
126            if blocklist.contains(unit_name) {
127                return false;
128            }
129            true
130        })
131        .collect();
132
133    if units_to_check.is_empty() {
134        return Ok(stats);
135    }
136
137    // Run systemd-analyze verify on all units at once for better performance
138    let output = tokio::task::spawn_blocking(move || {
139        let mut cmd = Command::new("systemd-analyze");
140        cmd.arg("verify");
141        for unit_name in &units_to_check {
142            cmd.arg(unit_name);
143        }
144        cmd.output()
145    })
146    .await
147    .map_err(|e| MonitordVerifyError::CommandError(e.to_string()))?
148    .map_err(|e| MonitordVerifyError::CommandError(e.to_string()))?;
149
150    // Parse stderr for failing units
151    let stderr = String::from_utf8_lossy(&output.stderr);
152    let failing_units = parse_verify_output(&stderr);
153
154    // Count failures by type
155    for unit_name in failing_units {
156        stats.total += 1;
157
158        if let Some(unit_type) = get_unit_type(&unit_name) {
159            *stats.by_type.entry(unit_type).or_insert(0) += 1;
160        }
161    }
162
163    Ok(stats)
164}
165
166/// Async wrapper that updates verify stats when passed a locked struct
167pub async fn update_verify_stats(
168    connection: zbus::Connection,
169    locked_machine_stats: Arc<RwLock<MachineStats>>,
170    allowlist: HashSet<String>,
171    blocklist: HashSet<String>,
172) -> anyhow::Result<()> {
173    let verify_stats = get_verify_stats(&connection, &allowlist, &blocklist)
174        .await
175        .map_err(|e| anyhow::anyhow!("Error getting verify stats: {:?}", e))?;
176
177    let mut machine_stats = locked_machine_stats.write().await;
178    machine_stats.verify_stats = Some(verify_stats);
179    Ok(())
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn test_get_unit_type() {
188        assert_eq!(get_unit_type("foo.service"), Some("service".to_string()));
189        assert_eq!(get_unit_type("bar.slice"), Some("slice".to_string()));
190        assert_eq!(get_unit_type("baz.timer"), Some("timer".to_string()));
191        assert_eq!(get_unit_type("test"), Some("test".to_string()));
192    }
193
194    #[test]
195    fn test_verify_stats_default() {
196        let stats = VerifyStats::default();
197        assert_eq!(stats.total, 0);
198        assert_eq!(stats.by_type.len(), 0);
199    }
200
201    #[test]
202    fn test_parse_verify_output() {
203        let stderr = r#"
204/usr/lib/systemd/system/foo.service:4: Unknown section 'Service'. Ignoring.
205bar.slice: Command /bin/foo is not executable: No such file or directory
206Unit baz.timer not found.
207test-with-error.target: Some error message here
208"#;
209        let failing = parse_verify_output(stderr);
210        // Debug output
211        let mut sorted: Vec<_> = failing.iter().collect();
212        sorted.sort();
213        for unit in &sorted {
214            eprintln!("Found unit: {}", unit);
215        }
216
217        assert!(failing.contains("foo.service"));
218        assert!(failing.contains("bar.slice"));
219        assert!(failing.contains("baz.timer"));
220        assert!(failing.contains("test-with-error.target"));
221        assert_eq!(failing.len(), 4);
222    }
223}