plex-restart/monitor.py
2026-04-12 13:03:45 -04:00

177 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""
PiKVM Auto-Restart Monitor
Monitors host connectivity and performs hard reset if downtime exceeds threshold.
"""
import ping3
import time
import logging
import os
import sys
from datetime import datetime, timedelta
from pathlib import Path
try:
from gpiozero import Button
except ImportError:
# Fallback if gpiozero not available
import RPi.GPIO as GPIO
# Configuration
CONFIG = {
"host_ip": os.getenv("HOST_IP", "192.168.1.10"),
"gateway_ip": os.getenv("GATEWAY_IP", "192.168.1.1"),
"ping_interval": int(os.getenv("PING_INTERVAL", 180)), # 3 minutes
"downtime_threshold": int(os.getenv("DOWNTIME_THRESHOLD", 15)), # 15 minutes
"power_button_gpio": int(os.getenv("POWER_BUTTON_GPIO", 17)),
"long_press_duration": float(os.getenv("LONG_PRESS_DURATION", 5)), # 5 seconds to power down
"short_press_duration": float(os.getenv("SHORT_PRESS_DURATION", 1)), # 1 second to power on
"wait_before_reboot": int(os.getenv("WAIT_BEFORE_REBOOT", 90)), # 90 seconds
"log_file": os.getenv("LOG_FILE", "/var/log/pikvm-monitor.log"),
}
# Setup logging
os.makedirs(os.path.dirname(CONFIG["log_file"]), exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler(CONFIG["log_file"]),
logging.StreamHandler(),
],
)
logger = logging.getLogger(__name__)
# State file to track reboots
STATE_FILE = "/var/lib/pikvm-monitor/state.txt"
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
def press_power_button(duration):
"""Simulate power button press via GPIO."""
try:
# Try using gpiozero first (preferred)
try:
button = Button(CONFIG["power_button_gpio"])
logger.info(f"Pressing power button for {duration} seconds")
button.pin.drive_high()
time.sleep(duration)
button.pin.drive_low()
logger.info("Power button press complete")
except NameError:
# Fallback to RPi.GPIO
GPIO.setmode(GPIO.BCM)
GPIO.setup(CONFIG["power_button_gpio"], GPIO.OUT, initial=GPIO.LOW)
logger.info(f"Pressing power button for {duration} seconds")
GPIO.output(CONFIG["power_button_gpio"], GPIO.HIGH)
time.sleep(duration)
GPIO.output(CONFIG["power_button_gpio"], GPIO.LOW)
logger.info("Power button press complete")
GPIO.cleanup()
except Exception as e:
logger.error(f"Error pressing power button: {e}")
raise
def ping_host(ip_address):
"""Ping host and return True if alive."""
try:
response = ping3.ping(ip_address, timeout=5)
return response is not None
except Exception as e:
logger.debug(f"Ping to {ip_address} failed: {e}")
return False
def check_host_alive():
"""Check if host is alive, with fallback to gateway."""
if ping_host(CONFIG["host_ip"]):
logger.debug(f"Host {CONFIG['host_ip']} is alive")
return True
logger.debug(f"Host {CONFIG['host_ip']} not responding, trying gateway fallback")
if ping_host(CONFIG["gateway_ip"]):
logger.debug(f"Gateway {CONFIG['gateway_ip']} is alive (host assumed up)")
return True
logger.warning(f"Both host ({CONFIG['host_ip']}) and gateway ({CONFIG['gateway_ip']}) unreachable")
return False
def perform_reset():
"""Perform hard reset: long press power down, wait, short press power on."""
logger.warning("=" * 60)
logger.warning("INITIATING HARD RESET SEQUENCE")
logger.warning("=" * 60)
try:
# Power down
logger.info("Step 1: Long press to power down")
press_power_button(CONFIG["long_press_duration"])
# Wait for shutdown to complete
logger.info(f"Step 2: Waiting {CONFIG['wait_before_reboot']} seconds for cool-down")
time.sleep(CONFIG["wait_before_reboot"])
# Power on
logger.info("Step 3: Short press to power on")
press_power_button(CONFIG["short_press_duration"])
logger.info("Step 4: Reset sequence complete")
logger.warning("=" * 60)
# Record in state file
with open(STATE_FILE, "a") as f:
f.write(f"{datetime.now().isoformat()}: Reset performed\n")
except Exception as e:
logger.error(f"Error during reset sequence: {e}")
raise
def main():
"""Main monitoring loop."""
logger.info("=" * 60)
logger.info("PiKVM Auto-Restart Monitor Started")
logger.info(f"Configuration: Host={CONFIG['host_ip']}, Gateway={CONFIG['gateway_ip']}")
logger.info(f"Ping interval={CONFIG['ping_interval']}s, Threshold={CONFIG['downtime_threshold']}min")
logger.info("=" * 60)
consecutive_failures = 0
last_success = datetime.now()
while True:
try:
if check_host_alive():
consecutive_failures = 0
last_success = datetime.now()
logger.info("✓ Host is alive")
else:
consecutive_failures += 1
downtime_minutes = (consecutive_failures * CONFIG["ping_interval"]) / 60
logger.warning(
f"✗ Host unreachable ({consecutive_failures} attempts, {downtime_minutes:.1f} min downtime)"
)
# Check if downtime threshold exceeded
if consecutive_failures * CONFIG["ping_interval"] >= CONFIG["downtime_threshold"] * 60:
logger.error(
f"Downtime threshold exceeded ({downtime_minutes:.1f} min). Initiating reset."
)
perform_reset()
consecutive_failures = 0 # Reset counter after reboot
time.sleep(CONFIG["ping_interval"])
except KeyboardInterrupt:
logger.info("Monitor stopped by user")
sys.exit(0)
except Exception as e:
logger.error(f"Unexpected error: {e}", exc_info=True)
time.sleep(CONFIG["ping_interval"])
if __name__ == "__main__":
main()