From 0e10f1f1354a60fae85ac10ce1150012fb69ba06 Mon Sep 17 00:00:00 2001 From: TylerCG <117808427+TylerCG@users.noreply.github.com> Date: Sun, 12 Apr 2026 13:03:45 -0400 Subject: [PATCH] first commit --- .env.example | 31 +++++++ .gitignore | 45 +++++++++ Dockerfile | 23 +++++ INSTALL_NATIVE.md | 139 ++++++++++++++++++++++++++++ PROJECT_GUIDE.md | 100 ++++++++++++++++++++ README.md | 222 +++++++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 33 +++++++ health_check.py | 122 +++++++++++++++++++++++++ monitor.py | 176 +++++++++++++++++++++++++++++++++++ requirements.txt | 3 + test_gpio.py | 121 ++++++++++++++++++++++++ 11 files changed, 1015 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 INSTALL_NATIVE.md create mode 100644 PROJECT_GUIDE.md create mode 100644 README.md create mode 100644 docker-compose.yml create mode 100644 health_check.py create mode 100644 monitor.py create mode 100644 requirements.txt create mode 100644 test_gpio.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..34146a3 --- /dev/null +++ b/.env.example @@ -0,0 +1,31 @@ +# PiKVM Monitor Configuration + +# IP address of the host to monitor +HOST_IP=192.168.1.10 + +# Fallback gateway IP (for network-level connectivity check) +GATEWAY_IP=192.168.1.1 + +# Interval between pings (in seconds) - default: 180 (3 minutes) +PING_INTERVAL=180 + +# Downtime threshold before triggering reset (in minutes) - default: 15 +# 15 minutes = 5 failed pings at 3-minute intervals +DOWNTIME_THRESHOLD=15 + +# GPIO pin number for power button control - default: BCM pin 17 +# Check your PiKVM documentation for the correct GPIO pin +POWER_BUTTON_GPIO=17 + +# Duration of long press to power down (in seconds) - default: 5 +LONG_PRESS_DURATION=5 + +# Duration of short press to power on (in seconds) - default: 1 +SHORT_PRESS_DURATION=1 + +# Wait time between power down and power on (in seconds) - default: 90 (1.5 min) +# Allows time for cooldown and system shutdown +WAIT_BEFORE_REBOOT=90 + +# Log file path (inside container at /var/log/pikvm-monitor.log) +LOG_FILE=/var/log/pikvm-monitor.log diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3a6e22e --- /dev/null +++ b/.gitignore @@ -0,0 +1,45 @@ +# Environment variables +.env +!.env.example + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Logs +logs/ +*.log + +# Docker +.dockerignore diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..faee56f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.11-bullseye + +WORKDIR /app + +# Install system dependencies for GPIO control +RUN apt-get update && apt-get install -y \ + python3-gpiozero \ + python3-pip \ + iputils-ping \ + && rm -rf /var/lib/apt/lists/* + +# Copy application +COPY monitor.py . +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Create directories for logs and state +RUN mkdir -p /var/log /var/lib/pikvm-monitor + +# Run the monitor +CMD ["python3", "monitor.py"] diff --git a/INSTALL_NATIVE.md b/INSTALL_NATIVE.md new file mode 100644 index 0000000..2a5cf1e --- /dev/null +++ b/INSTALL_NATIVE.md @@ -0,0 +1,139 @@ +# Alternative: Run without Docker + +If you prefer to run directly on PiKVM without Docker, follow these steps: + +## Installation + +### 1. Install Dependencies + +```bash +sudo apt-get update +sudo apt-get install -y python3 python3-pip python3-gpiozero python3-pip iputils-ping +``` + +### 2. Install Python Packages + +```bash +pip3 install --user ping3==4.0.1 gpiozero==2.0.1 RPi.GPIO==0.7.0 +``` + +### 3. Copy Files + +```bash +sudo mkdir -p /opt/pikvm-monitor +sudo cp monitor.py /opt/pikvm-monitor/ +sudo chown pikvm:pikvm /opt/pikvm-monitor/monitor.py +sudo chmod +x /opt/pikvm-monitor/monitor.py +``` + +## Option A: Run as systemd Service + +### 1. Create systemd service file + +```bash +sudo tee /etc/systemd/system/pikvm-monitor.service << EOF +[Unit] +Description=PiKVM Auto-Restart Monitor +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=pikvm +WorkingDirectory=/opt/pikvm-monitor +ExecStart=/usr/bin/python3 /opt/pikvm-monitor/monitor.py +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal + +# GPIO requires this +ExecStartPost=/bin/sh -c 'echo pikvm | sudo -S usermod -a -G gpio,spi pikvm' + +# Environment variables +Environment="HOST_IP=192.168.1.10" +Environment="GATEWAY_IP=192.168.1.1" +Environment="POWER_BUTTON_GPIO=17" +Environment="PING_INTERVAL=180" +Environment="DOWNTIME_THRESHOLD=15" + +[Install] +WantedBy=multi-user.target +EOF +``` + +### 2. Enable and Start + +```bash +sudo systemctl daemon-reload +sudo systemctl enable pikvm-monitor +sudo systemctl start pikvm-monitor +``` + +### 3. Check Status + +```bash +sudo systemctl status pikvm-monitor +sudo journalctl -u pikvm-monitor -f +``` + +## Option B: Run in Screen/Tmux + +```bash +screen -S pikvm-monitor python3 /opt/pikvm-monitor/monitor.py +``` + +## Option C: Cron-based Task + +For lightweight monitoring, you could create a simple cron check, but systemd service is recommended for continuous monitoring. + +## GPIO Permissions + +Make sure the `pikvm` user has GPIO access: + +```bash +sudo usermod -a -G gpio pikvm +sudo usermod -a -G spi pikvm +``` + +Then log out and log back in. + +## Logs + +Logs go to `/var/log/pikvm-monitor.log`: + +```bash +tail -f /var/log/pikvm-monitor.log +``` + +## Troubleshooting + +### Permission denied on GPIO + +```bash +sudo usermod -a -G gpio pikvm +# Log out and back in +``` + +### Module not found + +```bash +pip3 install --user ping3 gpiozero RPi.GPIO +``` + +### Test GPIO locally + +```bash +python3 -c "from gpiozero import Button; print('GPIO OK')" +``` + +## Docker vs Native + +| | Docker | Native | +|---|--------|--------| +| Isolation | ✓ | ✗ | +| Portability | ✓ | ✗ | +| Resource use | Higher | Lower | +| Setup time | Faster | More steps | +| Debugging | docker logs | journalctl | +| Recommended | ✓ | Used if Docker unavailable | diff --git a/PROJECT_GUIDE.md b/PROJECT_GUIDE.md new file mode 100644 index 0000000..525b873 --- /dev/null +++ b/PROJECT_GUIDE.md @@ -0,0 +1,100 @@ +# Project Structure + +``` +plex-restart/ +├── monitor.py # Main monitoring agent +├── Dockerfile # Container definition +├── docker-compose.yml # Service orchestration +├── requirements.txt # Python dependencies +├── test_gpio.py # GPIO testing utility +├── health_check.py # Service health monitor +├── .env.example # Configuration template +├── INSTALL_NATIVE.md # Native installation guide +├── README.md # Complete documentation +└── .gitignore # Git ignore rules +``` + +## Quick Reference + +### Deploy (Docker Compose) +```bash +cp .env.example .env +# Edit .env with your host IP and GPIO pin +docker-compose up -d +docker-compose logs -f +``` + +### Deploy (Native) +See `INSTALL_NATIVE.md` for systemd service or manual installation. + +### Test GPIO +```bash +POWER_BUTTON_GPIO=17 python3 test_gpio.py +``` + +### Check Health +```bash +python3 health_check.py +``` + +### View Logs +```bash +# Docker +docker-compose logs -f pikvm-monitor + +# Native/systemd +journalctl -u pikvm-monitor -f +tail -f /var/log/pikvm-monitor.log +``` + +## Key Parameters + +- **Ping Interval**: 180 seconds (3 minutes) +- **Downtime Threshold**: 15 minutes (5 failed pings) +- **Long Press**: 5 seconds (power down) +- **Wait**: 90 seconds (cool-down) +- **Short Press**: 1 second (power on) + +Adjust in `.env` as needed for faster/slower recovery. + +## Files Overview + +| File | Purpose | +|------|---------| +| `monitor.py` | Main monitoring loop with GPIO control | +| `Dockerfile` | Builds container image | +| `docker-compose.yml` | Defines service with volume/env mapping | +| `requirements.txt` | Python package versions | +| `test_gpio.py` | Tests GPIO pin configuration | +| `health_check.py` | Verifies monitor is running | +| `.env.example` | Configuration template | +| `INSTALL_NATIVE.md` | Systemd/manual setup | + +## Architecture + +``` +PiKVM (Runs Docker Compose) + └── pikvm-monitor Container + ├── Pings HOST_IP every 3 min + ├── Falls back to GATEWAY_IP + ├── Tracks consecutive failures + ├── After 15 min downtime: + │ ├── Long press GPIO (5 sec) → Power down + │ ├── Wait 90 seconds + │ └── Short press GPIO (1 sec) → Power on + └── Logs to /var/log/pikvm-monitor.log +``` + +## Next Steps + +1. **Configure**: Edit `.env` with your IPs and GPIO pin +2. **Test GPIO**: Run `test_gpio.py` to verify pin works +3. **Deploy**: Use `docker-compose up -d` +4. **Monitor**: Check logs with `docker-compose logs -f` +5. **Verify**: Run `health_check.py` periodically + +## Support + +- PiKVM Docs: https://docs.pikvm.org/ +- gpiozero: https://gpiozero.readthedocs.io/ +- Docker Compose: https://docs.docker.com/compose/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..d7cf7db --- /dev/null +++ b/README.md @@ -0,0 +1,222 @@ +# PiKVM Auto-Restart Monitor + +Automatic monitoring and recovery agent for PiKVM that watches a connected host and performs a hard reset if the system becomes unresponsive due to thermal throttling or other issues. + +## Features + +- **Continuous Monitoring**: Pings the host every 3 minutes (configurable) +- **Fallback Detection**: Tries host IP first, falls back to gateway for network-level connectivity check +- **Auto Recovery**: Performs hard reset after 15 minutes of downtime (configurable) +- **GPIO Control**: Simulates power button presses for graceful shutdown followed by restart +- **Cool-Down Period**: Waits 90 seconds between power down and restart to allow system cooling +- **Docker Native**: Runs as a Docker Compose service on PiKVM +- **Comprehensive Logging**: Tracks all events and reboot history + +## Hardware Requirements + +- PiKVM with Raspberry Pi (4 or better recommended) +- GPIO pins configured for power button control + - Typically BCM GPIO 17 for power button + - Check your PiKVM documentation to confirm +- Network access to the host and gateway + +## Quick Start + +### 1. Clone or Copy to PiKVM + +```bash +git clone /home/pikvm/plex-restart +cd /home/pikvm/plex-restart +``` + +### 2. Configure Environment + +Copy the example config and update with your values: + +```bash +cp .env.example .env +``` + +Edit `.env` with your host details: + +```env +HOST_IP=192.168.1.10 # Your host's IP address +GATEWAY_IP=192.168.1.1 # Your network gateway IP +POWER_BUTTON_GPIO=17 # GPIO pin (confirm with PiKVM docs) +``` + +### 3. Deploy with Docker Compose + +```bash +docker-compose up -d +``` + +Verify it's running: + +```bash +docker-compose logs -f pikvm-monitor +``` + +## Configuration Options + +All settings can be configured via environment variables in `.env`: + +| Variable | Default | Description | +|----------|---------|-------------| +| `HOST_IP` | `192.168.1.10` | IP address of host to monitor | +| `GATEWAY_IP` | `192.168.1.1` | Fallback gateway for connectivity check | +| `PING_INTERVAL` | `180` | Seconds between pings (3 min) | +| `DOWNTIME_THRESHOLD` | `15` | Minutes of downtime before reset | +| `POWER_BUTTON_GPIO` | `17` | BCM GPIO pin for power button | +| `LONG_PRESS_DURATION` | `5` | Seconds to hold for power down | +| `SHORT_PRESS_DURATION` | `1` | Seconds to hold for power on | +| `WAIT_BEFORE_REBOOT` | `90` | Seconds to wait between power down/up | + +### Example: Faster Recovery + +To recover in 9 minutes instead of 15: + +```env +PING_INTERVAL=180 # 3 minutes +DOWNTIME_THRESHOLD=9 # 9 minutes +``` + +This triggers reset after 3 failed pings (9 minutes total). + +## Monitoring & Logs + +### View Live Logs + +```bash +docker-compose logs -f pikvm-monitor +``` + +### Inside Container + +```bash +docker exec pikvm-monitor tail -f /var/log/pikvm-monitor.log +``` + +### Reset History + +```bash +docker exec pikvm-monitor cat /var/lib/pikvm-monitor/state.txt +``` + +## Manual Control + +### Stop Monitor + +```bash +docker-compose down +``` + +### Restart Monitor + +```bash +docker-compose restart pikvm-monitor +``` + +### View Status + +```bash +docker-compose ps +``` + +## Troubleshooting + +### Monitor Not Starting + +Check logs: +```bash +docker-compose logs pikvm-monitor +``` + +Common issues: +- GPIO pins in use by another service +- Incorrect GPIO pin number +- Network connectivity issues + +### Not Detecting Host Down + +Verify connectivity manually: +```bash +ping +ping +``` + +Check: +- Host IP is correct in `.env` +- Network can reach both IPs +- PiKVM has network access + +### Power Button Not Working + +1. Verify GPIO pin number in PiKVM documentation +2. Update `POWER_BUTTON_GPIO` in `.env` +3. Test GPIO access: + ```bash + docker exec pikvm-monitor python3 -c "from gpiozero import Button; b = Button(17); print('GPIO working')" + ``` + +## Architecture + +The monitor runs as a single long-running process: + +``` +Startup + ↓ +Load Configuration + ↓ +Every 180 seconds: + ├─ Ping HOST_IP + │ └─ If fails, ping GATEWAY_IP (fallback) + ├─ If alive: Reset counter + └─ If down: Increment counter + └─ If counter × PING_INTERVAL ≥ DOWNTIME_THRESHOLD: + ├─ Long press power button (5 sec) + ├─ Wait 90 seconds + ├─ Short press power button (1 sec) + └─ Reset counter + ↓ +Repeat +``` + +## Performance Considerations + +- **CPU**: Minimal (~5-10% during checks) +- **Memory**: ~50-80MB +- **Network**: Single ICMP ping every 3 minutes +- **GPIO**: Brief pulses only during reset + +Safe to run alongside other PiKVM services. + +## Development + +### Local Testing (without GPIO) + +```bash +# Mock GPIO by catching exceptions during testing +python3 monitor.py +``` + +### Building Custom Image + +```bash +docker build -t pikvm-monitor:latest . +``` + +## License + +MIT + +## Support + +For issues with PiKVM GPIO access: +- [PiKVM Documentation](https://docs.pikvm.org/) +- [gpiozero Library](https://gpiozero.readthedocs.io/) + +For issues with this monitor: +- Check logs: `docker-compose logs` +- Verify `.env` configuration +- Test GPIO pin access manually diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8990377 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,33 @@ +version: '3.8' + +services: + pikvm-monitor: + build: . + image: pikvm-monitor:latest + container_name: pikvm-monitor + restart: always + privileged: true # Required for GPIO access + volumes: + - /var/log/pikvm-monitor:/var/log + - /var/lib/pikvm-monitor:/var/lib/pikvm-monitor + environment: + - HOST_IP=${HOST_IP:-192.168.1.10} + - GATEWAY_IP=${GATEWAY_IP:-192.168.1.1} + - PING_INTERVAL=${PING_INTERVAL:-180} + - DOWNTIME_THRESHOLD=${DOWNTIME_THRESHOLD:-15} + - POWER_BUTTON_GPIO=${POWER_BUTTON_GPIO:-17} + - LONG_PRESS_DURATION=${LONG_PRESS_DURATION:-5} + - SHORT_PRESS_DURATION=${SHORT_PRESS_DURATION:-1} + - WAIT_BEFORE_REBOOT=${WAIT_BEFORE_REBOOT:-90} + - LOG_FILE=/var/log/pikvm-monitor.log + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + networks: + - default + +networks: + default: + driver: bridge diff --git a/health_check.py b/health_check.py new file mode 100644 index 0000000..3a54a7e --- /dev/null +++ b/health_check.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +""" +Health Check Script - Monitor the monitoring service +Can be used with cron or external monitoring tools +""" + +import os +import sys +from pathlib import Path +from datetime import datetime, timedelta + +LOG_FILE = "/var/log/pikvm-monitor.log" +STATE_FILE = "/var/lib/pikvm-monitor/state.txt" + +def check_log_recent(): + """Check if monitor has logged something recently (within 10 minutes).""" + if not Path(LOG_FILE).exists(): + print(f"✗ Log file not found: {LOG_FILE}") + return False + + try: + mtime = Path(LOG_FILE).stat().st_mtime + last_update = datetime.fromtimestamp(mtime) + age = datetime.now() - last_update + + if age > timedelta(minutes=10): + print(f"✗ Log file not updated recently (last: {age.total_seconds():.0f}s ago)") + return False + + print(f"✓ Log file updated {age.total_seconds():.0f}s ago") + return True + except Exception as e: + print(f"✗ Error checking log: {e}") + return False + +def check_container_running(): + """Check if Docker container is running.""" + try: + import docker + client = docker.from_env() + containers = client.containers.list() + for container in containers: + if "pikvm-monitor" in container.name: + if container.status == "running": + print(f"✓ Container {container.name} is running") + return True + else: + print(f"✗ Container {container.name} is {container.status}") + return False + print("✗ Container pikvm-monitor not found") + return False + except Exception as e: + print(f"⚠ Could not check container (not using Docker?): {e}") + return True # Don't fail if not using Docker + +def check_process_running(): + """Check if Python monitor process is running.""" + try: + import subprocess + result = subprocess.run(["pgrep", "-f", "monitor.py"], capture_output=True) + if result.returncode == 0: + print("✓ Monitor process is running") + return True + else: + print("✗ Monitor process not running") + return False + except Exception as e: + print(f"⚠ Could not check process: {e}") + return True + +def check_network(): + """Check basic network connectivity.""" + import socket + try: + socket.create_connection(("8.8.8.8", 53), timeout=3) + print("✓ Network connectivity OK") + return True + except: + print("✗ Network connectivity issue") + return False + +def main(): + print("=" * 60) + print("PiKVM Monitor Health Check") + print("=" * 60) + print() + + checks = [ + ("Process Running", check_process_running), + ("Container Running", check_container_running), + ("Log File Recent", check_log_recent), + ("Network Connectivity", check_network), + ] + + results = [] + for name, check_func in checks: + print(f"Checking: {name}...") + try: + results.append(check_func()) + except Exception as e: + print(f"✗ Check failed: {e}") + results.append(False) + print() + + passed = sum(results) + total = len(results) + + print("=" * 60) + print(f"Results: {passed}/{total} checks passed") + + if passed == total: + print("✓ Monitor is healthy") + sys.exit(0) + elif passed >= total - 1: + print("⚠ Monitor has minor issues") + sys.exit(1) + else: + print("✗ Monitor has critical issues") + sys.exit(2) + +if __name__ == "__main__": + main() diff --git a/monitor.py b/monitor.py new file mode 100644 index 0000000..7e307e0 --- /dev/null +++ b/monitor.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +""" +PiKVM Auto-Restart Monitor +Monitors host connectivity and performs hard reset if downtime exceeds threshold. +""" + +import ping3 +import time +import logging +import os +import sys +from datetime import datetime, timedelta +from pathlib import Path + +try: + from gpiozero import Button +except ImportError: + # Fallback if gpiozero not available + import RPi.GPIO as GPIO + +# Configuration +CONFIG = { + "host_ip": os.getenv("HOST_IP", "192.168.1.10"), + "gateway_ip": os.getenv("GATEWAY_IP", "192.168.1.1"), + "ping_interval": int(os.getenv("PING_INTERVAL", 180)), # 3 minutes + "downtime_threshold": int(os.getenv("DOWNTIME_THRESHOLD", 15)), # 15 minutes + "power_button_gpio": int(os.getenv("POWER_BUTTON_GPIO", 17)), + "long_press_duration": float(os.getenv("LONG_PRESS_DURATION", 5)), # 5 seconds to power down + "short_press_duration": float(os.getenv("SHORT_PRESS_DURATION", 1)), # 1 second to power on + "wait_before_reboot": int(os.getenv("WAIT_BEFORE_REBOOT", 90)), # 90 seconds + "log_file": os.getenv("LOG_FILE", "/var/log/pikvm-monitor.log"), +} + +# Setup logging +os.makedirs(os.path.dirname(CONFIG["log_file"]), exist_ok=True) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[ + logging.FileHandler(CONFIG["log_file"]), + logging.StreamHandler(), + ], +) +logger = logging.getLogger(__name__) + +# State file to track reboots +STATE_FILE = "/var/lib/pikvm-monitor/state.txt" +os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True) + + +def press_power_button(duration): + """Simulate power button press via GPIO.""" + try: + # Try using gpiozero first (preferred) + try: + button = Button(CONFIG["power_button_gpio"]) + logger.info(f"Pressing power button for {duration} seconds") + button.pin.drive_high() + time.sleep(duration) + button.pin.drive_low() + logger.info("Power button press complete") + except NameError: + # Fallback to RPi.GPIO + GPIO.setmode(GPIO.BCM) + GPIO.setup(CONFIG["power_button_gpio"], GPIO.OUT, initial=GPIO.LOW) + logger.info(f"Pressing power button for {duration} seconds") + GPIO.output(CONFIG["power_button_gpio"], GPIO.HIGH) + time.sleep(duration) + GPIO.output(CONFIG["power_button_gpio"], GPIO.LOW) + logger.info("Power button press complete") + GPIO.cleanup() + except Exception as e: + logger.error(f"Error pressing power button: {e}") + raise + + +def ping_host(ip_address): + """Ping host and return True if alive.""" + try: + response = ping3.ping(ip_address, timeout=5) + return response is not None + except Exception as e: + logger.debug(f"Ping to {ip_address} failed: {e}") + return False + + +def check_host_alive(): + """Check if host is alive, with fallback to gateway.""" + if ping_host(CONFIG["host_ip"]): + logger.debug(f"Host {CONFIG['host_ip']} is alive") + return True + + logger.debug(f"Host {CONFIG['host_ip']} not responding, trying gateway fallback") + if ping_host(CONFIG["gateway_ip"]): + logger.debug(f"Gateway {CONFIG['gateway_ip']} is alive (host assumed up)") + return True + + logger.warning(f"Both host ({CONFIG['host_ip']}) and gateway ({CONFIG['gateway_ip']}) unreachable") + return False + + +def perform_reset(): + """Perform hard reset: long press power down, wait, short press power on.""" + logger.warning("=" * 60) + logger.warning("INITIATING HARD RESET SEQUENCE") + logger.warning("=" * 60) + + try: + # Power down + logger.info("Step 1: Long press to power down") + press_power_button(CONFIG["long_press_duration"]) + + # Wait for shutdown to complete + logger.info(f"Step 2: Waiting {CONFIG['wait_before_reboot']} seconds for cool-down") + time.sleep(CONFIG["wait_before_reboot"]) + + # Power on + logger.info("Step 3: Short press to power on") + press_power_button(CONFIG["short_press_duration"]) + + logger.info("Step 4: Reset sequence complete") + logger.warning("=" * 60) + + # Record in state file + with open(STATE_FILE, "a") as f: + f.write(f"{datetime.now().isoformat()}: Reset performed\n") + + except Exception as e: + logger.error(f"Error during reset sequence: {e}") + raise + + +def main(): + """Main monitoring loop.""" + logger.info("=" * 60) + logger.info("PiKVM Auto-Restart Monitor Started") + logger.info(f"Configuration: Host={CONFIG['host_ip']}, Gateway={CONFIG['gateway_ip']}") + logger.info(f"Ping interval={CONFIG['ping_interval']}s, Threshold={CONFIG['downtime_threshold']}min") + logger.info("=" * 60) + + consecutive_failures = 0 + last_success = datetime.now() + + while True: + try: + if check_host_alive(): + consecutive_failures = 0 + last_success = datetime.now() + logger.info("✓ Host is alive") + else: + consecutive_failures += 1 + downtime_minutes = (consecutive_failures * CONFIG["ping_interval"]) / 60 + logger.warning( + f"✗ Host unreachable ({consecutive_failures} attempts, {downtime_minutes:.1f} min downtime)" + ) + + # Check if downtime threshold exceeded + if consecutive_failures * CONFIG["ping_interval"] >= CONFIG["downtime_threshold"] * 60: + logger.error( + f"Downtime threshold exceeded ({downtime_minutes:.1f} min). Initiating reset." + ) + perform_reset() + consecutive_failures = 0 # Reset counter after reboot + + time.sleep(CONFIG["ping_interval"]) + + except KeyboardInterrupt: + logger.info("Monitor stopped by user") + sys.exit(0) + except Exception as e: + logger.error(f"Unexpected error: {e}", exc_info=True) + time.sleep(CONFIG["ping_interval"]) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..67f4e65 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +ping3==4.0.1 +gpiozero==2.0.1 +RPi.GPIO==0.7.0 diff --git a/test_gpio.py b/test_gpio.py new file mode 100644 index 0000000..8f16499 --- /dev/null +++ b/test_gpio.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +GPIO Test Utility - Verify power button GPIO configuration +""" + +import sys +import time +import os + +try: + from gpiozero import Button + GPIOZERO_AVAILABLE = True +except ImportError: + GPIOZERO_AVAILABLE = False + try: + import RPi.GPIO as GPIO + except ImportError: + print("ERROR: Neither gpiozero nor RPi.GPIO could be imported") + print("Install with: pip3 install gpiozero RPi.GPIO") + sys.exit(1) + +def test_gpio_gpiozero(pin): + """Test GPIO with gpiozero library.""" + print(f"Testing GPIO pin {pin} with gpiozero...") + try: + button = Button(pin) + print(f"✓ Button object created for pin {pin}") + + # Test set high + print(f"Setting pin {pin} HIGH (simulating button press)...") + button.pin.drive_high() + time.sleep(1) + print("✓ Pin set HIGH") + + # Test set low + print(f"Setting pin {pin} LOW (releasing button)...") + button.pin.drive_low() + time.sleep(0.5) + print("✓ Pin set LOW") + + print("✓ GPIO test successful with gpiozero\n") + return True + except Exception as e: + print(f"✗ Error with gpiozero: {e}\n") + return False + +def test_gpio_rpi(pin): + """Test GPIO with RPi.GPIO library.""" + print(f"Testing GPIO pin {pin} with RPi.GPIO...") + try: + GPIO.setmode(GPIO.BCM) + GPIO.setup(pin, GPIO.OUT, initial=GPIO.LOW) + print(f"✓ GPIO pin {pin} configured") + + # Test set high + print(f"Setting pin {pin} HIGH (simulating button press)...") + GPIO.output(pin, GPIO.HIGH) + time.sleep(1) + print("✓ Pin set HIGH") + + # Test set low + print(f"Setting pin {pin} LOW (releasing button)...") + GPIO.output(pin, GPIO.LOW) + time.sleep(0.5) + print("✓ Pin set LOW") + + GPIO.cleanup() + print("✓ GPIO test successful with RPi.GPIO\n") + return True + except Exception as e: + print(f"✗ Error with RPi.GPIO: {e}\n") + return False + +def main(): + print("=" * 60) + print("PiKVM Power Button GPIO Test Utility") + print("=" * 60) + print() + + # Get GPIO pin from environment or default + gpio_pin = int(os.getenv("POWER_BUTTON_GPIO", 17)) + print(f"Testing GPIO pin: {gpio_pin}") + print("This will briefly activate the GPIO pin (1 second).") + print("Make sure this is safe before proceeding!\n") + + response = input("Continue? (yes/no): ").strip().lower() + if response not in ['yes', 'y']: + print("Cancelled.") + sys.exit(0) + + print("\n" + "=" * 60) + + # Try gpiozero first + if GPIOZERO_AVAILABLE: + success = test_gpio_gpiozero(gpio_pin) + if success: + print("✓ GPIO is working correctly with gpiozero") + print("\nYou can use this pin in your .env configuration:") + print(f" POWER_BUTTON_GPIO={gpio_pin}") + sys.exit(0) + + # Fallback to RPi.GPIO + if not GPIOZERO_AVAILABLE or not success: + print("Trying RPi.GPIO fallback...\n") + success = test_gpio_rpi(gpio_pin) + if success: + print("✓ GPIO is working correctly with RPi.GPIO") + print("\nYou can use this pin in your .env configuration:") + print(f" POWER_BUTTON_GPIO={gpio_pin}") + sys.exit(0) + + print("\n✗ GPIO test failed") + print("\nTroubleshooting:") + print("1. Verify the GPIO pin number is correct for your PiKVM") + print("2. Check that the GPIO pins are not already in use") + print("3. Ensure you're running with appropriate permissions (sudo)") + print("4. Check PiKVM documentation for correct pin configuration") + sys.exit(1) + +if __name__ == "__main__": + main()