- derive visible hosts, Fleet label, and critical peers from dx.hide_homelan in mesh-hosts.json - "Toggle homelan visibility (recovery)" menu action + immediate re-render of /etc/hosts+ssh - only lime (DO backend with MCPs/tools) is core critical for the icon; redroid is shown but does not degrade - stale homelan (pear etc) filtered from tray labels when using the new DO config (hide=true) - tray README and module docs updated; the switcher makes the active config obvious in the menu bar - this is the private path for MCPs on DO (and other internal tools): wg mesh (yuzu hub + lime spoke) + net-tools names, no public exposure .
390 lines
16 KiB
Python
Executable file
390 lines
16 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""WireGuard VPN + net-tools fleet system tray app.
|
|
|
|
This is the UI for the private wg1 mesh (net-tools) that lets us reach
|
|
internal-only tools (MCPs on DO lime:3910+, quinn.api INTERNAL on lime:3030,
|
|
forge, etc.) without any public listeners or ports. All those services bind
|
|
only to 10.9.0.0/24 (or lo) + ufw rules; plum joins via this tray (or `net up`)
|
|
and talks to lime.yg etc. The hub is still yuzu in Iceland; DO droplets (lime,
|
|
redroid) are spokes on the same mesh.
|
|
|
|
Tunnel state drives the icon; the menu shows:
|
|
- the live fleet view from agent-status.json (location, route, agent freshness)
|
|
- the *active fleet config* (Fleet: line) derived from dx.hide_homelan in
|
|
data/mesh-hosts.json — "DO cloud-only (homelan hidden)" is the normal new
|
|
config; the homelan hosts (pear/apricot) are preserved in the json only for
|
|
one-day recovery and are filtered from labels/criticals when hidden.
|
|
- a toggle action so the switch between configs is one click and obvious.
|
|
|
|
No more hard-coded "pear" critical peer (that made the icon stay orange after
|
|
the homelan died). Critical reachability is derived from the active hosts in
|
|
the json so the icon goes green exactly when the DO mesh (lime etc.) is healthy.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
# Add local lilith_tray to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from lilith_tray import TrayApp, TrayConfig, TrayIcon, TrayMenuItem
|
|
|
|
# The wg1 mesh lives on 10.9.0.0/24. macOS assigns the WireGuard utun number
|
|
# dynamically (utun4 on one boot, utun6 on another), so the tunnel interface is
|
|
# identified by the address it carries, never by a hardcoded name.
|
|
MESH_PREFIX = "10.9.0."
|
|
# Mesh hub (yuzu/quinn-vps) — answering pings proves the tunnel is actually
|
|
# carrying traffic, not merely configured.
|
|
MESH_HUB = "10.9.0.1"
|
|
# The net-tools agent's per-cycle snapshot (written by smart-lan-router.py).
|
|
AGENT_STATUS = Path(__file__).resolve().parent.parent / "data" / "agent-status.json"
|
|
MESH_DATA = Path(__file__).resolve().parent.parent / "data" / "mesh-hosts.json"
|
|
AGENT_STALE_SEC = 90
|
|
TRAY_LABEL = "com.wireguard.vpn-tray"
|
|
# Critical peers are no longer a hardcoded homelan name. We derive the set of
|
|
# hosts that must be reachable (for green vs degraded) from mesh-hosts.json,
|
|
# respecting dx.hide_homelan. This lets the tray reflect the *active fleet
|
|
# config*: the new DO cloud setup (lime + redroid + yuzu over wg) vs the
|
|
# optional homelan (apricot/pear) kept only for future recovery.
|
|
# When hide=true we only require the cloud hosts; pear etc. never cause orange.
|
|
|
|
|
|
class VPNTray(TrayApp):
|
|
"""WireGuard VPN tray application."""
|
|
|
|
WG_CONF = Path.home() / ".wireguard" / "wg1.conf"
|
|
|
|
def __init__(self) -> None:
|
|
icons_dir = Path(__file__).parent / "icons"
|
|
config = TrayConfig(
|
|
name="WireGuard VPN",
|
|
icons={
|
|
"connected": TrayIcon.from_file(icons_dir / "vpn-green-18@2x.png"),
|
|
"disconnected": TrayIcon.from_file(icons_dir / "vpn-red-18@2x.png"),
|
|
"connecting": TrayIcon.from_file(icons_dir / "vpn-yellow-18@2x.png"),
|
|
"degraded": TrayIcon.from_file(icons_dir / "vpn-orange-18@2x.png"),
|
|
},
|
|
initial_icon="disconnected",
|
|
menu=[
|
|
TrayMenuItem.action("Connect", self._connect),
|
|
TrayMenuItem.action("Disconnect", self._disconnect),
|
|
TrayMenuItem.separator(),
|
|
# The switcher: makes the active "config" (DO cloud vs optional
|
|
# homelan recovery data) explicit in the tray and lets you flip
|
|
# without hand-editing mesh-hosts.json + re-running renders.
|
|
TrayMenuItem.action("Toggle homelan visibility (recovery)", self._toggle_homelan_visibility),
|
|
TrayMenuItem.separator(),
|
|
TrayMenuItem.quit("Quit", before=self._unload_launch_agent),
|
|
],
|
|
poll_interval=5,
|
|
)
|
|
# Single source of truth: poll_status() refreshes these, get_status_labels()
|
|
# reads them. The backend always calls poll_status() first, so the icon and
|
|
# the "Status" label can never disagree.
|
|
self._state: str = "disconnected"
|
|
self._ip: str | None = None
|
|
self._degraded: list[str] = [] # critical peers currently unreachable
|
|
super().__init__(config)
|
|
# mesh data is small; we reload on demand so toggle is immediately visible
|
|
# without restarting the tray process.
|
|
|
|
def _load_mesh_data(self) -> dict:
|
|
"""Load mesh-hosts.json (SoT for hosts, wg addrs, and dx.hide_homelan)."""
|
|
try:
|
|
with open(MESH_DATA, encoding="utf-8") as fh:
|
|
return json.load(fh)
|
|
except (OSError, json.JSONDecodeError):
|
|
return {}
|
|
|
|
def _is_homelan_hidden(self) -> bool:
|
|
d = self._load_mesh_data()
|
|
return bool(d.get("dx", {}).get("hide_homelan", False))
|
|
|
|
def _visible_host_names(self) -> set[str]:
|
|
"""Names (canonical + aliases) that are part of the active fleet config."""
|
|
d = self._load_mesh_data()
|
|
hide = self._is_homelan_hidden()
|
|
names: set[str] = set()
|
|
for h in d.get("hosts", []):
|
|
if hide and h.get("class") != "cloud":
|
|
continue
|
|
names.add(h["name"])
|
|
for a in (h.get("aliases") or []):
|
|
names.add(a)
|
|
return names
|
|
|
|
def _critical_ips(self) -> list[str]:
|
|
"""IPs of the *core* hosts for the active config (lime for DO tools access;
|
|
homelan pear only when explicitly un-hidden). Other cloud nodes (redroid)
|
|
are shown in the menu but do not affect the green/degraded icon."""
|
|
d = self._load_mesh_data()
|
|
hide = self._is_homelan_hidden()
|
|
discovered = (self._agent_status() or {}).get("discovered") or {}
|
|
core = ("lime",) if hide else ("lime", "pear")
|
|
ips: list[str] = []
|
|
for h in d.get("hosts", []):
|
|
name = h["name"]
|
|
if name not in core:
|
|
continue
|
|
ip = discovered.get(name) or h.get("lan") or h.get("wg")
|
|
if ip:
|
|
ips.append(ip)
|
|
return list(dict.fromkeys(ips))
|
|
|
|
def _wg_interface(self) -> tuple[str, str] | None:
|
|
"""Return (interface, ip) of the live WireGuard mesh tunnel, or None.
|
|
|
|
The tunnel is whichever interface carries a 10.9.0.x address on an
|
|
``inet`` line — the utun number itself is assigned dynamically by macOS
|
|
and must not be hardcoded.
|
|
"""
|
|
try:
|
|
result = subprocess.run(
|
|
["ifconfig"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5,
|
|
)
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
return None
|
|
if result.returncode != 0:
|
|
return None
|
|
|
|
current: str | None = None
|
|
for line in result.stdout.splitlines():
|
|
header = re.match(r"^(\w+):\s+flags=", line)
|
|
if header:
|
|
current = header.group(1)
|
|
continue
|
|
stripped = line.strip()
|
|
if current and stripped.startswith("inet ") and MESH_PREFIX in stripped:
|
|
ip = stripped.split()[1]
|
|
if ip.startswith(MESH_PREFIX):
|
|
return current, ip
|
|
return None
|
|
|
|
@staticmethod
|
|
def _ping(host: str, wait_ms: str = "1000") -> bool:
|
|
"""One ICMP echo to ``host``; True on reply. ``wait_ms`` is the per-probe
|
|
timeout (macOS ping -W is milliseconds)."""
|
|
try:
|
|
result = subprocess.run(
|
|
["ping", "-c", "1", "-W", wait_ms, host],
|
|
capture_output=True,
|
|
timeout=5,
|
|
)
|
|
return result.returncode == 0
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
return False
|
|
|
|
def _can_reach_vpn(self) -> bool:
|
|
"""Check if we can reach the VPN hub (proves the tunnel carries traffic)."""
|
|
return self._ping(MESH_HUB, "2000")
|
|
|
|
def _degraded_peers(self) -> list[str]:
|
|
"""Names of *core* critical hosts (per active fleet config) that are unreachable.
|
|
|
|
Only lime (the DO host with our private MCPs/tools) is required for green
|
|
when using the new DO config. Redroid and other clouds are informational
|
|
only in the menu. When homelan is un-hidden for recovery we also require
|
|
pear. This replaces the old hard-coded CRITICAL_PEERS=("pear",) that
|
|
kept the tray orange forever after the homelan died."""
|
|
degraded: list[str] = []
|
|
d = self._load_mesh_data()
|
|
hide = self._is_homelan_hidden()
|
|
discovered = (self._agent_status() or {}).get("discovered") or {}
|
|
core = ("lime",) if hide else ("lime", "pear")
|
|
for h in d.get("hosts", []):
|
|
name = h.get("name")
|
|
if name not in core:
|
|
continue
|
|
ip = discovered.get(name) or h.get("lan") or h.get("wg")
|
|
if ip and not self._ping(ip, "1500"):
|
|
degraded.append(name)
|
|
return degraded
|
|
|
|
def poll_status(self) -> str:
|
|
"""Refresh VPN state and return the matching icon key.
|
|
|
|
Both the tray icon and the menu labels derive from the state computed
|
|
here, so they always agree:
|
|
- no tunnel interface -> "disconnected" (red)
|
|
- tunnel up, hub unreachable -> "connecting" (yellow)
|
|
- tunnel up, hub ok, active-config peer down -> "degraded" (orange)
|
|
- tunnel up, hub + active config peers reachable -> "connected" (green)
|
|
|
|
The "active config" peers come from mesh-hosts.json filtered by the
|
|
current dx.hide_homelan (DO lime etc. when using the new DO config).
|
|
"""
|
|
interface = self._wg_interface()
|
|
if interface is None:
|
|
self._state = "disconnected"
|
|
self._ip = None
|
|
self._degraded = []
|
|
else:
|
|
_, self._ip = interface
|
|
if not self._can_reach_vpn():
|
|
self._state = "connecting"
|
|
self._degraded = []
|
|
else:
|
|
self._degraded = self._degraded_peers()
|
|
self._state = "degraded" if self._degraded else "connected"
|
|
return self._state
|
|
|
|
def _agent_status(self) -> dict | None:
|
|
"""The net-tools agent's last snapshot, or None if absent/unparseable."""
|
|
try:
|
|
with open(AGENT_STATUS, encoding="utf-8") as fh:
|
|
return json.load(fh)
|
|
except (OSError, json.JSONDecodeError):
|
|
return None
|
|
|
|
def get_status_labels(self) -> dict[str, str]:
|
|
"""Menu labels: tunnel state + fleet view from agent snapshot + the
|
|
active DO/homelan fleet config (from dx.hide_homelan in mesh-hosts.json).
|
|
|
|
We surface the hosts belonging to the active config (their wg IPs for
|
|
cloud/DO nodes; lan for visible homelan). Stale homelan entries are
|
|
filtered out when using the new DO config so the tray is not noisy."""
|
|
if self._state == "connected":
|
|
labels = {"Status": "Connected"}
|
|
if self._ip:
|
|
labels["IP"] = self._ip
|
|
elif self._state == "degraded":
|
|
labels = {"Status": "Degraded — mesh partial"}
|
|
if self._ip:
|
|
labels["IP"] = self._ip
|
|
if self._degraded:
|
|
labels["Unreachable"] = ", ".join(self._degraded)
|
|
elif self._state == "connecting":
|
|
labels = {"Status": "Connecting..."}
|
|
else:
|
|
labels = {"Status": "Disconnected"}
|
|
|
|
d = self._load_mesh_data()
|
|
hide = self._is_homelan_hidden()
|
|
labels["Fleet"] = "DO cloud-only (homelan hidden)" if hide else "Full (homelan + cloud visible — recovery)"
|
|
|
|
agent = self._agent_status()
|
|
if agent is None:
|
|
labels["Agent"] = "no status"
|
|
return labels
|
|
age = int(time.time()) - int(agent.get("ts", 0))
|
|
labels["Agent"] = f"stale {age}s" if age > AGENT_STALE_SEC else f"ok ({age}s ago)"
|
|
if agent.get("location"):
|
|
via = agent.get("lan_route_via") or "?"
|
|
labels["Mode"] = f"{agent['location']} via {via}"
|
|
|
|
vis = self._visible_host_names()
|
|
for name, ip in sorted((agent.get("discovered") or {}).items()):
|
|
if name in vis:
|
|
labels[name] = ip
|
|
|
|
# Always list the active config's declared wg hosts (lime, yuzu, redroid
|
|
# etc for DO; plus homelan when un-hidden). Discovered lan overrides win
|
|
# for homelan hosts when visible.
|
|
for h in d.get("hosts", []):
|
|
name = h["name"]
|
|
if hide and h.get("class") != "cloud":
|
|
continue
|
|
if name not in labels:
|
|
wg = h.get("wg")
|
|
if wg:
|
|
labels[name] = wg
|
|
if agent.get("head"):
|
|
labels["Repo"] = agent["head"]
|
|
return labels
|
|
|
|
def _unload_launch_agent(self) -> None:
|
|
"""Quit tray; fleet agent will not respawn until install-tray clears the flag."""
|
|
flag = Path(__file__).resolve().parent.parent / "data" / ".tray-disabled"
|
|
flag.parent.mkdir(parents=True, exist_ok=True)
|
|
flag.touch()
|
|
uid = os.getuid()
|
|
plist = Path.home() / "Library" / "LaunchAgents" / f"{TRAY_LABEL}.plist"
|
|
for target in (f"gui/{uid}/{TRAY_LABEL}", str(plist)):
|
|
subprocess.run(["launchctl", "bootout", target], capture_output=True)
|
|
|
|
def _connect(self) -> None:
|
|
"""Connect to VPN."""
|
|
if self._wg_interface() is not None:
|
|
self.notify("VPN", "Already connected")
|
|
return
|
|
|
|
self.set_icon("connecting")
|
|
# Use osascript for GUI password prompt
|
|
script = f'''do shell script "wg-quick up {self.WG_CONF}" with administrator privileges'''
|
|
subprocess.run(["osascript", "-e", script], capture_output=True)
|
|
|
|
def _disconnect(self) -> None:
|
|
"""Disconnect from VPN."""
|
|
if self._wg_interface() is None:
|
|
self.notify("VPN", "Already disconnected")
|
|
return
|
|
|
|
# Use osascript for GUI password prompt. wg-quick down takes the config
|
|
# path and resolves the real (dynamic) utun name itself.
|
|
script = f'''do shell script "wg-quick down {self.WG_CONF}" with administrator privileges'''
|
|
subprocess.run(["osascript", "-e", script], capture_output=True)
|
|
|
|
def _toggle_homelan_visibility(self) -> None:
|
|
"""Switch the active fleet config between the new DO cloud setup and the
|
|
optional homelan (for one-day recovery). Edits the SoT, re-renders
|
|
/etc/hosts + ~/.ssh/config for this machine, and updates the tray labels
|
|
immediately. The change is local+staged; commit + push so the rest of
|
|
the fleet converges."""
|
|
mesh_path = MESH_DATA
|
|
try:
|
|
with open(mesh_path, encoding="utf-8") as fh:
|
|
data = json.load(fh)
|
|
except Exception as e:
|
|
self.notify("Fleet config", f"Read failed: {e}")
|
|
return
|
|
|
|
dx = data.setdefault("dx", {})
|
|
was_hidden = bool(dx.get("hide_homelan", False))
|
|
dx["hide_homelan"] = not was_hidden
|
|
|
|
try:
|
|
with open(mesh_path, "w", encoding="utf-8") as fh:
|
|
json.dump(data, fh, indent=2, ensure_ascii=False)
|
|
fh.write("\n")
|
|
except Exception as e:
|
|
self.notify("Fleet config", f"Write failed: {e}")
|
|
return
|
|
|
|
root = mesh_path.parent.parent
|
|
mhr = root / "bin" / "mesh-hosts-render"
|
|
ha = root / "bin" / "host-apply"
|
|
|
|
# Re-render (mesh-hosts-render writes /etc/hosts — needs sudo; ssh is user).
|
|
try:
|
|
subprocess.run(["sudo", str(mhr), "--install"], check=False, capture_output=True, timeout=30)
|
|
subprocess.run([str(ha), "--ssh-apply"], check=False, capture_output=True, timeout=15)
|
|
except Exception:
|
|
pass
|
|
|
|
new_mode = "DO cloud-only (homelan hidden)" if dx["hide_homelan"] else "Full (homelan visible)"
|
|
self.notify("Fleet config", f"Switched to {new_mode}. (git commit the json change so fleet pulls it.)")
|
|
|
|
# Refresh icon/labels right now
|
|
state = self.poll_status()
|
|
self.set_icon(state)
|
|
labels = self.get_status_labels()
|
|
self.set_status_labels(labels)
|
|
|
|
|
|
def main() -> None:
|
|
app = VPNTray()
|
|
app.run()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|