#!/usr/bin/env python3 """WireGuard VPN + net-tools fleet system tray app. This is the UI for the private wg1 mesh (net-tools) that lets us reach internal-only tools (MCPs on DO lime:3910+, quinn.api INTERNAL on lime:3030, forge, etc.) without any public listeners or ports. All those services bind only to 10.9.0.0/24 (or lo) + ufw rules; plum joins via this tray (or `net up`) and talks to lime.yg etc. The hub is still yuzu in Iceland; DO droplets (lime, redroid) are spokes on the same mesh. Tunnel state drives the icon; the menu shows: - the live fleet view from agent-status.json (location, route, agent freshness) - the *active fleet config* (Fleet: line) derived from dx.hide_homelan in data/mesh-hosts.json — "DO cloud-only (homelan hidden)" is the normal new config; the homelan hosts (pear/apricot) are preserved in the json only for one-day recovery and are filtered from labels/criticals when hidden. - a toggle action so the switch between configs is one click and obvious. No more hard-coded "pear" critical peer (that made the icon stay orange after the homelan died). Critical reachability is derived from the active hosts in the json so the icon goes green exactly when the DO mesh (lime etc.) is healthy. """ from __future__ import annotations import json import os import re import subprocess import sys import time from pathlib import Path # Add local lilith_tray to path sys.path.insert(0, str(Path(__file__).parent)) from lilith_tray import TrayApp, TrayConfig, TrayIcon, TrayMenuItem # The wg1 mesh lives on 10.9.0.0/24. macOS assigns the WireGuard utun number # dynamically (utun4 on one boot, utun6 on another), so the tunnel interface is # identified by the address it carries, never by a hardcoded name. MESH_PREFIX = "10.9.0." # Mesh hub (yuzu/quinn-vps) — answering pings proves the tunnel is actually # carrying traffic, not merely configured. MESH_HUB = "10.9.0.1" # The net-tools agent's per-cycle snapshot (written by smart-lan-router.py). AGENT_STATUS = Path(__file__).resolve().parent.parent / "data" / "agent-status.json" MESH_DATA = Path(__file__).resolve().parent.parent / "data" / "mesh-hosts.json" AGENT_STALE_SEC = 90 TRAY_LABEL = "com.wireguard.vpn-tray" # Critical peers are no longer a hardcoded homelan name. We derive the set of # hosts that must be reachable (for green vs degraded) from mesh-hosts.json, # respecting dx.hide_homelan. This lets the tray reflect the *active fleet # config*: the new DO cloud setup (lime + redroid + yuzu over wg) vs the # optional homelan (apricot/pear) kept only for future recovery. # When hide=true we only require the cloud hosts; pear etc. never cause orange. class VPNTray(TrayApp): """WireGuard VPN tray application.""" WG_CONF = Path.home() / ".wireguard" / "wg1.conf" def __init__(self) -> None: icons_dir = Path(__file__).parent / "icons" config = TrayConfig( name="WireGuard VPN", icons={ "connected": TrayIcon.from_file(icons_dir / "vpn-green-18@2x.png"), "disconnected": TrayIcon.from_file(icons_dir / "vpn-red-18@2x.png"), "connecting": TrayIcon.from_file(icons_dir / "vpn-yellow-18@2x.png"), "degraded": TrayIcon.from_file(icons_dir / "vpn-orange-18@2x.png"), }, initial_icon="disconnected", menu=[ TrayMenuItem.action("Connect", self._connect), TrayMenuItem.action("Disconnect", self._disconnect), TrayMenuItem.separator(), # The switcher: makes the active "config" (DO cloud vs optional # homelan recovery data) explicit in the tray and lets you flip # without hand-editing mesh-hosts.json + re-running renders. TrayMenuItem.action("Toggle homelan visibility (recovery)", self._toggle_homelan_visibility), TrayMenuItem.separator(), TrayMenuItem.quit("Quit", before=self._unload_launch_agent), ], poll_interval=5, ) # Single source of truth: poll_status() refreshes these, get_status_labels() # reads them. The backend always calls poll_status() first, so the icon and # the "Status" label can never disagree. self._state: str = "disconnected" self._ip: str | None = None self._degraded: list[str] = [] # critical peers currently unreachable super().__init__(config) # mesh data is small; we reload on demand so toggle is immediately visible # without restarting the tray process. def _load_mesh_data(self) -> dict: """Load mesh-hosts.json (SoT for hosts, wg addrs, and dx.hide_homelan).""" try: with open(MESH_DATA, encoding="utf-8") as fh: return json.load(fh) except (OSError, json.JSONDecodeError): return {} def _is_homelan_hidden(self) -> bool: d = self._load_mesh_data() return bool(d.get("dx", {}).get("hide_homelan", False)) def _visible_host_names(self) -> set[str]: """Names (canonical + aliases) that are part of the active fleet config.""" d = self._load_mesh_data() hide = self._is_homelan_hidden() names: set[str] = set() for h in d.get("hosts", []): if hide and h.get("class") != "cloud": continue names.add(h["name"]) for a in (h.get("aliases") or []): names.add(a) return names def _critical_ips(self) -> list[str]: """IPs of the *core* hosts for the active config (lime for DO tools access; homelan pear only when explicitly un-hidden). Other cloud nodes (redroid) are shown in the menu but do not affect the green/degraded icon.""" d = self._load_mesh_data() hide = self._is_homelan_hidden() discovered = (self._agent_status() or {}).get("discovered") or {} core = ("lime",) if hide else ("lime", "pear") ips: list[str] = [] for h in d.get("hosts", []): name = h["name"] if name not in core: continue ip = discovered.get(name) or h.get("lan") or h.get("wg") if ip: ips.append(ip) return list(dict.fromkeys(ips)) def _wg_interface(self) -> tuple[str, str] | None: """Return (interface, ip) of the live WireGuard mesh tunnel, or None. The tunnel is whichever interface carries a 10.9.0.x address on an ``inet`` line — the utun number itself is assigned dynamically by macOS and must not be hardcoded. """ try: result = subprocess.run( ["ifconfig"], capture_output=True, text=True, timeout=5, ) except (subprocess.TimeoutExpired, FileNotFoundError): return None if result.returncode != 0: return None current: str | None = None for line in result.stdout.splitlines(): header = re.match(r"^(\w+):\s+flags=", line) if header: current = header.group(1) continue stripped = line.strip() if current and stripped.startswith("inet ") and MESH_PREFIX in stripped: ip = stripped.split()[1] if ip.startswith(MESH_PREFIX): return current, ip return None @staticmethod def _ping(host: str, wait_ms: str = "1000") -> bool: """One ICMP echo to ``host``; True on reply. ``wait_ms`` is the per-probe timeout (macOS ping -W is milliseconds).""" try: result = subprocess.run( ["ping", "-c", "1", "-W", wait_ms, host], capture_output=True, timeout=5, ) return result.returncode == 0 except (subprocess.TimeoutExpired, FileNotFoundError): return False def _can_reach_vpn(self) -> bool: """Check if we can reach the VPN hub (proves the tunnel carries traffic).""" return self._ping(MESH_HUB, "2000") def _degraded_peers(self) -> list[str]: """Names of *core* critical hosts (per active fleet config) that are unreachable. Only lime (the DO host with our private MCPs/tools) is required for green when using the new DO config. Redroid and other clouds are informational only in the menu. When homelan is un-hidden for recovery we also require pear. This replaces the old hard-coded CRITICAL_PEERS=("pear",) that kept the tray orange forever after the homelan died.""" degraded: list[str] = [] d = self._load_mesh_data() hide = self._is_homelan_hidden() discovered = (self._agent_status() or {}).get("discovered") or {} core = ("lime",) if hide else ("lime", "pear") for h in d.get("hosts", []): name = h.get("name") if name not in core: continue ip = discovered.get(name) or h.get("lan") or h.get("wg") if ip and not self._ping(ip, "1500"): degraded.append(name) return degraded def poll_status(self) -> str: """Refresh VPN state and return the matching icon key. Both the tray icon and the menu labels derive from the state computed here, so they always agree: - no tunnel interface -> "disconnected" (red) - tunnel up, hub unreachable -> "connecting" (yellow) - tunnel up, hub ok, active-config peer down -> "degraded" (orange) - tunnel up, hub + active config peers reachable -> "connected" (green) The "active config" peers come from mesh-hosts.json filtered by the current dx.hide_homelan (DO lime etc. when using the new DO config). """ interface = self._wg_interface() if interface is None: self._state = "disconnected" self._ip = None self._degraded = [] else: _, self._ip = interface if not self._can_reach_vpn(): self._state = "connecting" self._degraded = [] else: self._degraded = self._degraded_peers() self._state = "degraded" if self._degraded else "connected" return self._state def _agent_status(self) -> dict | None: """The net-tools agent's last snapshot, or None if absent/unparseable.""" try: with open(AGENT_STATUS, encoding="utf-8") as fh: return json.load(fh) except (OSError, json.JSONDecodeError): return None def get_status_labels(self) -> dict[str, str]: """Menu labels: tunnel state + fleet view from agent snapshot + the active DO/homelan fleet config (from dx.hide_homelan in mesh-hosts.json). We surface the hosts belonging to the active config (their wg IPs for cloud/DO nodes; lan for visible homelan). Stale homelan entries are filtered out when using the new DO config so the tray is not noisy.""" if self._state == "connected": labels = {"Status": "Connected"} if self._ip: labels["IP"] = self._ip elif self._state == "degraded": labels = {"Status": "Degraded — mesh partial"} if self._ip: labels["IP"] = self._ip if self._degraded: labels["Unreachable"] = ", ".join(self._degraded) elif self._state == "connecting": labels = {"Status": "Connecting..."} else: labels = {"Status": "Disconnected"} d = self._load_mesh_data() hide = self._is_homelan_hidden() labels["Fleet"] = "DO cloud-only (homelan hidden)" if hide else "Full (homelan + cloud visible — recovery)" agent = self._agent_status() if agent is None: labels["Agent"] = "no status" return labels age = int(time.time()) - int(agent.get("ts", 0)) labels["Agent"] = f"stale {age}s" if age > AGENT_STALE_SEC else f"ok ({age}s ago)" if agent.get("location"): via = agent.get("lan_route_via") or "?" labels["Mode"] = f"{agent['location']} via {via}" vis = self._visible_host_names() for name, ip in sorted((agent.get("discovered") or {}).items()): if name in vis: labels[name] = ip # Always list the active config's declared wg hosts (lime, yuzu, redroid # etc for DO; plus homelan when un-hidden). Discovered lan overrides win # for homelan hosts when visible. for h in d.get("hosts", []): name = h["name"] if hide and h.get("class") != "cloud": continue if name not in labels: wg = h.get("wg") if wg: labels[name] = wg if agent.get("head"): labels["Repo"] = agent["head"] return labels def _unload_launch_agent(self) -> None: """Quit tray; fleet agent will not respawn until install-tray clears the flag.""" flag = Path(__file__).resolve().parent.parent / "data" / ".tray-disabled" flag.parent.mkdir(parents=True, exist_ok=True) flag.touch() uid = os.getuid() plist = Path.home() / "Library" / "LaunchAgents" / f"{TRAY_LABEL}.plist" for target in (f"gui/{uid}/{TRAY_LABEL}", str(plist)): subprocess.run(["launchctl", "bootout", target], capture_output=True) def _connect(self) -> None: """Connect to VPN.""" if self._wg_interface() is not None: self.notify("VPN", "Already connected") return self.set_icon("connecting") # Use osascript for GUI password prompt script = f'''do shell script "wg-quick up {self.WG_CONF}" with administrator privileges''' subprocess.run(["osascript", "-e", script], capture_output=True) def _disconnect(self) -> None: """Disconnect from VPN.""" if self._wg_interface() is None: self.notify("VPN", "Already disconnected") return # Use osascript for GUI password prompt. wg-quick down takes the config # path and resolves the real (dynamic) utun name itself. script = f'''do shell script "wg-quick down {self.WG_CONF}" with administrator privileges''' subprocess.run(["osascript", "-e", script], capture_output=True) def _toggle_homelan_visibility(self) -> None: """Switch the active fleet config between the new DO cloud setup and the optional homelan (for one-day recovery). Edits the SoT, re-renders /etc/hosts + ~/.ssh/config for this machine, and updates the tray labels immediately. The change is local+staged; commit + push so the rest of the fleet converges.""" mesh_path = MESH_DATA try: with open(mesh_path, encoding="utf-8") as fh: data = json.load(fh) except Exception as e: self.notify("Fleet config", f"Read failed: {e}") return dx = data.setdefault("dx", {}) was_hidden = bool(dx.get("hide_homelan", False)) dx["hide_homelan"] = not was_hidden try: with open(mesh_path, "w", encoding="utf-8") as fh: json.dump(data, fh, indent=2, ensure_ascii=False) fh.write("\n") except Exception as e: self.notify("Fleet config", f"Write failed: {e}") return root = mesh_path.parent.parent mhr = root / "bin" / "mesh-hosts-render" ha = root / "bin" / "host-apply" # Re-render (mesh-hosts-render writes /etc/hosts — needs sudo; ssh is user). try: subprocess.run(["sudo", str(mhr), "--install"], check=False, capture_output=True, timeout=30) subprocess.run([str(ha), "--ssh-apply"], check=False, capture_output=True, timeout=15) except Exception: pass new_mode = "DO cloud-only (homelan hidden)" if dx["hide_homelan"] else "Full (homelan visible)" self.notify("Fleet config", f"Switched to {new_mode}. (git commit the json change so fleet pulls it.)") # Refresh icon/labels right now state = self.poll_status() self.set_icon(state) labels = self.get_status_labels() self.set_status_labels(labels) def main() -> None: app = VPNTray() app.run() if __name__ == "__main__": main()