#!/usr/bin/env python3 """ Health Check Script - Monitor the monitoring service Can be used with cron or external monitoring tools """ import os import sys from pathlib import Path from datetime import datetime, timedelta LOG_FILE = "/var/log/pikvm-monitor.log" STATE_FILE = "/var/lib/pikvm-monitor/state.txt" def check_log_recent(): """Check if monitor has logged something recently (within 10 minutes).""" if not Path(LOG_FILE).exists(): print(f"✗ Log file not found: {LOG_FILE}") return False try: mtime = Path(LOG_FILE).stat().st_mtime last_update = datetime.fromtimestamp(mtime) age = datetime.now() - last_update if age > timedelta(minutes=10): print(f"✗ Log file not updated recently (last: {age.total_seconds():.0f}s ago)") return False print(f"✓ Log file updated {age.total_seconds():.0f}s ago") return True except Exception as e: print(f"✗ Error checking log: {e}") return False def check_container_running(): """Check if Docker container is running.""" try: import docker client = docker.from_env() containers = client.containers.list() for container in containers: if "pikvm-monitor" in container.name: if container.status == "running": print(f"✓ Container {container.name} is running") return True else: print(f"✗ Container {container.name} is {container.status}") return False print("✗ Container pikvm-monitor not found") return False except Exception as e: print(f"⚠ Could not check container (not using Docker?): {e}") return True # Don't fail if not using Docker def check_process_running(): """Check if Python monitor process is running.""" try: import subprocess result = subprocess.run(["pgrep", "-f", "monitor.py"], capture_output=True) if result.returncode == 0: print("✓ Monitor process is running") return True else: print("✗ Monitor process not running") return False except Exception as e: print(f"⚠ Could not check process: {e}") return True def check_network(): """Check basic network connectivity.""" import socket try: socket.create_connection(("8.8.8.8", 53), timeout=3) print("✓ Network connectivity OK") return True except: print("✗ Network connectivity issue") return False def main(): print("=" * 60) print("PiKVM Monitor Health Check") print("=" * 60) print() checks = [ ("Process Running", check_process_running), ("Container Running", check_container_running), ("Log File Recent", check_log_recent), ("Network Connectivity", check_network), ] results = [] for name, check_func in checks: print(f"Checking: {name}...") try: results.append(check_func()) except Exception as e: print(f"✗ Check failed: {e}") results.append(False) print() passed = sum(results) total = len(results) print("=" * 60) print(f"Results: {passed}/{total} checks passed") if passed == total: print("✓ Monitor is healthy") sys.exit(0) elif passed >= total - 1: print("⚠ Monitor has minor issues") sys.exit(1) else: print("✗ Monitor has critical issues") sys.exit(2) if __name__ == "__main__": main()