#!/usr/bin/env python3 """ PiKVM Auto-Restart Monitor Monitors host connectivity and performs hard reset if downtime exceeds threshold. """ import ping3 import time import logging import os import sys from datetime import datetime, timedelta from pathlib import Path try: from gpiozero import Button except ImportError: # Fallback if gpiozero not available import RPi.GPIO as GPIO # Configuration CONFIG = { "host_ip": os.getenv("HOST_IP", "192.168.1.10"), "gateway_ip": os.getenv("GATEWAY_IP", "192.168.1.1"), "ping_interval": int(os.getenv("PING_INTERVAL", 180)), # 3 minutes "downtime_threshold": int(os.getenv("DOWNTIME_THRESHOLD", 15)), # 15 minutes "power_button_gpio": int(os.getenv("POWER_BUTTON_GPIO", 17)), "long_press_duration": float(os.getenv("LONG_PRESS_DURATION", 5)), # 5 seconds to power down "short_press_duration": float(os.getenv("SHORT_PRESS_DURATION", 1)), # 1 second to power on "wait_before_reboot": int(os.getenv("WAIT_BEFORE_REBOOT", 90)), # 90 seconds "log_file": os.getenv("LOG_FILE", "/var/log/pikvm-monitor.log"), } # Setup logging os.makedirs(os.path.dirname(CONFIG["log_file"]), exist_ok=True) logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.FileHandler(CONFIG["log_file"]), logging.StreamHandler(), ], ) logger = logging.getLogger(__name__) # State file to track reboots STATE_FILE = "/var/lib/pikvm-monitor/state.txt" os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True) def press_power_button(duration): """Simulate power button press via GPIO.""" try: # Try using gpiozero first (preferred) try: button = Button(CONFIG["power_button_gpio"]) logger.info(f"Pressing power button for {duration} seconds") button.pin.drive_high() time.sleep(duration) button.pin.drive_low() logger.info("Power button press complete") except NameError: # Fallback to RPi.GPIO GPIO.setmode(GPIO.BCM) GPIO.setup(CONFIG["power_button_gpio"], GPIO.OUT, initial=GPIO.LOW) logger.info(f"Pressing power button for {duration} seconds") GPIO.output(CONFIG["power_button_gpio"], GPIO.HIGH) time.sleep(duration) GPIO.output(CONFIG["power_button_gpio"], GPIO.LOW) logger.info("Power button press complete") GPIO.cleanup() except Exception as e: logger.error(f"Error pressing power button: {e}") raise def ping_host(ip_address): """Ping host and return True if alive.""" try: response = ping3.ping(ip_address, timeout=5) return response is not None except Exception as e: logger.debug(f"Ping to {ip_address} failed: {e}") return False def check_host_alive(): """Check if host is alive, with fallback to gateway.""" if ping_host(CONFIG["host_ip"]): logger.debug(f"Host {CONFIG['host_ip']} is alive") return True logger.debug(f"Host {CONFIG['host_ip']} not responding, trying gateway fallback") if ping_host(CONFIG["gateway_ip"]): logger.debug(f"Gateway {CONFIG['gateway_ip']} is alive (host assumed up)") return True logger.warning(f"Both host ({CONFIG['host_ip']}) and gateway ({CONFIG['gateway_ip']}) unreachable") return False def perform_reset(): """Perform hard reset: long press power down, wait, short press power on.""" logger.warning("=" * 60) logger.warning("INITIATING HARD RESET SEQUENCE") logger.warning("=" * 60) try: # Power down logger.info("Step 1: Long press to power down") press_power_button(CONFIG["long_press_duration"]) # Wait for shutdown to complete logger.info(f"Step 2: Waiting {CONFIG['wait_before_reboot']} seconds for cool-down") time.sleep(CONFIG["wait_before_reboot"]) # Power on logger.info("Step 3: Short press to power on") press_power_button(CONFIG["short_press_duration"]) logger.info("Step 4: Reset sequence complete") logger.warning("=" * 60) # Record in state file with open(STATE_FILE, "a") as f: f.write(f"{datetime.now().isoformat()}: Reset performed\n") except Exception as e: logger.error(f"Error during reset sequence: {e}") raise def main(): """Main monitoring loop.""" logger.info("=" * 60) logger.info("PiKVM Auto-Restart Monitor Started") logger.info(f"Configuration: Host={CONFIG['host_ip']}, Gateway={CONFIG['gateway_ip']}") logger.info(f"Ping interval={CONFIG['ping_interval']}s, Threshold={CONFIG['downtime_threshold']}min") logger.info("=" * 60) consecutive_failures = 0 last_success = datetime.now() while True: try: if check_host_alive(): consecutive_failures = 0 last_success = datetime.now() logger.info("✓ Host is alive") else: consecutive_failures += 1 downtime_minutes = (consecutive_failures * CONFIG["ping_interval"]) / 60 logger.warning( f"✗ Host unreachable ({consecutive_failures} attempts, {downtime_minutes:.1f} min downtime)" ) # Check if downtime threshold exceeded if consecutive_failures * CONFIG["ping_interval"] >= CONFIG["downtime_threshold"] * 60: logger.error( f"Downtime threshold exceeded ({downtime_minutes:.1f} min). Initiating reset." ) perform_reset() consecutive_failures = 0 # Reset counter after reboot time.sleep(CONFIG["ping_interval"]) except KeyboardInterrupt: logger.info("Monitor stopped by user") sys.exit(0) except Exception as e: logger.error(f"Unexpected error: {e}", exc_info=True) time.sleep(CONFIG["ping_interval"]) if __name__ == "__main__": main()