plex-restart/health_check.py
2026-04-12 13:03:45 -04:00

123 lines
3.6 KiB
Python

#!/usr/bin/env python3
"""
Health Check Script - Monitor the monitoring service
Can be used with cron or external monitoring tools
"""
import os
import sys
from pathlib import Path
from datetime import datetime, timedelta
LOG_FILE = "/var/log/pikvm-monitor.log"
STATE_FILE = "/var/lib/pikvm-monitor/state.txt"
def check_log_recent():
"""Check if monitor has logged something recently (within 10 minutes)."""
if not Path(LOG_FILE).exists():
print(f"✗ Log file not found: {LOG_FILE}")
return False
try:
mtime = Path(LOG_FILE).stat().st_mtime
last_update = datetime.fromtimestamp(mtime)
age = datetime.now() - last_update
if age > timedelta(minutes=10):
print(f"✗ Log file not updated recently (last: {age.total_seconds():.0f}s ago)")
return False
print(f"✓ Log file updated {age.total_seconds():.0f}s ago")
return True
except Exception as e:
print(f"✗ Error checking log: {e}")
return False
def check_container_running():
"""Check if Docker container is running."""
try:
import docker
client = docker.from_env()
containers = client.containers.list()
for container in containers:
if "pikvm-monitor" in container.name:
if container.status == "running":
print(f"✓ Container {container.name} is running")
return True
else:
print(f"✗ Container {container.name} is {container.status}")
return False
print("✗ Container pikvm-monitor not found")
return False
except Exception as e:
print(f"⚠ Could not check container (not using Docker?): {e}")
return True # Don't fail if not using Docker
def check_process_running():
"""Check if Python monitor process is running."""
try:
import subprocess
result = subprocess.run(["pgrep", "-f", "monitor.py"], capture_output=True)
if result.returncode == 0:
print("✓ Monitor process is running")
return True
else:
print("✗ Monitor process not running")
return False
except Exception as e:
print(f"⚠ Could not check process: {e}")
return True
def check_network():
"""Check basic network connectivity."""
import socket
try:
socket.create_connection(("8.8.8.8", 53), timeout=3)
print("✓ Network connectivity OK")
return True
except:
print("✗ Network connectivity issue")
return False
def main():
print("=" * 60)
print("PiKVM Monitor Health Check")
print("=" * 60)
print()
checks = [
("Process Running", check_process_running),
("Container Running", check_container_running),
("Log File Recent", check_log_recent),
("Network Connectivity", check_network),
]
results = []
for name, check_func in checks:
print(f"Checking: {name}...")
try:
results.append(check_func())
except Exception as e:
print(f"✗ Check failed: {e}")
results.append(False)
print()
passed = sum(results)
total = len(results)
print("=" * 60)
print(f"Results: {passed}/{total} checks passed")
if passed == total:
print("✓ Monitor is healthy")
sys.exit(0)
elif passed >= total - 1:
print("⚠ Monitor has minor issues")
sys.exit(1)
else:
print("✗ Monitor has critical issues")
sys.exit(2)
if __name__ == "__main__":
main()