atlus/backend/routers/display.py
roberts a5a7b01fd9 Complete rewrite of input forwarding — fix mouse and keyboard events
Root causes fixed:
- _focused was a stray class annotation in @dataclass, causing field
  ordering issues — moved to proper dataclass field
- xdotool type --window WID not supported on all versions — removed
  --window flag, use focused window instead
- xdotool commands with --window may fail silently — switched to
  absolute coordinates (window is at 0,0 filling the display)
- All xdotool errors were silently swallowed — now logged with stderr

Mouse events:
- Use absolute mousemove + click (no --window) since window fills display
- Separate mousemove and click into two calls for reliability
- Fire-and-forget for mousemove to reduce latency

Keyboard events:
- xdotool type (no --window) for printable characters
- xdotool key (no --window) for special keys and modifier combos
- Window focused once via _ensure_focus, not per-event

Diagnostics:
- Backend logs first 5 input events received per WebSocket session
- Backend logs xdotool stderr on failure
- Frontend logs first 10 input events sent + WS state warnings
- Frontend uses capture phase for keyboard events

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 01:22:59 -05:00

325 lines
10 KiB
Python

"""Display — WebSocket frame streaming + REST app management for GUI apps.
Pattern mirrors terminal.py: WebSocket attaches to a running GUI app,
receives JPEG frames, sends input events. REST endpoints manage app lifecycle.
"""
import logging
import os
import re
import shutil
from pathlib import Path
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query, WebSocket, WebSocketDisconnect
from pydantic import BaseModel
from backend.auth import get_current_user, ws_authenticate
from backend.display import display_manager, HAS_DISPLAY_DEPS
router = APIRouter(prefix="/api/display", tags=["display"])
log = logging.getLogger("atlus.display.router")
# ---------------------------------------------------------------------------
# Guards
# ---------------------------------------------------------------------------
def _require_deps():
if not HAS_DISPLAY_DEPS:
raise HTTPException(503, "Display dependencies not installed (Xvfb, xdotool, ImageMagick)")
# ---------------------------------------------------------------------------
# REST — app lifecycle
# ---------------------------------------------------------------------------
class AppLaunchRequest(BaseModel):
command: str
title: str = ""
args: list[str] = []
target_fps: int = 10
@router.get("/apps")
async def list_apps(user: str = Depends(get_current_user)):
"""List running GUI apps for the current user."""
_require_deps()
return {"apps": display_manager.list_apps(user)}
@router.post("/apps")
async def launch_app(req: AppLaunchRequest, user: str = Depends(get_current_user)):
"""Launch a GUI application on the user's virtual display."""
_require_deps()
# Check if this command is already running
existing = display_manager.get_app_by_command(user, req.command)
if existing:
return existing.to_dict()
try:
app = await display_manager.launch_app(
user, req.command, req.title, req.args, req.target_fps,
)
return app.to_dict()
except ValueError as e:
raise HTTPException(400, str(e))
except FileNotFoundError as e:
raise HTTPException(404, str(e))
except RuntimeError as e:
raise HTTPException(500, str(e))
@router.delete("/apps/{app_id}")
async def close_app(app_id: str, user: str = Depends(get_current_user)):
"""Stop a running GUI application."""
_require_deps()
if not display_manager.close_app(user, app_id):
raise HTTPException(404, "App not found")
return {"ok": True}
@router.post("/apps/{app_id}/restart")
async def restart_app(app_id: str, user: str = Depends(get_current_user)):
"""Restart a running GUI application (stop + relaunch)."""
_require_deps()
app = display_manager.get_app(user, app_id)
if not app:
raise HTTPException(404, "App not found")
# Save launch params before killing
command = app.command
title = app.title
target_fps = app.target_fps
# Stop the old instance
display_manager.close_app(user, app_id)
# Brief pause to let the process fully exit
import asyncio
await asyncio.sleep(1)
# Relaunch
try:
new_app = await display_manager.launch_app(
user, command, title, target_fps=target_fps,
)
return new_app.to_dict()
except Exception as e:
raise HTTPException(500, f"Restart failed: {e}")
@router.get("/status")
async def display_status(user: str = Depends(get_current_user)):
"""Check display system availability."""
return {
"available": HAS_DISPLAY_DEPS,
"apps": display_manager.list_apps(user),
}
# ---------------------------------------------------------------------------
# Desktop file discovery — find installed GUI apps
# ---------------------------------------------------------------------------
_DESKTOP_DIRS = [
Path("/usr/share/applications"),
Path("/usr/local/share/applications"),
Path(os.path.expanduser("~/.local/share/applications")),
Path("/var/lib/flatpak/exports/share/applications"),
Path(os.path.expanduser("~/.local/share/flatpak/exports/share/applications")),
Path("/snap/applications"),
]
# Categories that indicate a docked/GUI application
_GUI_CATEGORIES = {
"network", "webbrowser", "email", "office", "graphics", "audio", "video",
"chat", "instantmessaging", "filetransfer", "p2p", "remoteaccess",
"viewer", "player", "game", "utility", "system", "settings",
"monitor", "filesystem", "security", "accessibility",
}
# Commands to exclude (system utilities, terminals, etc.)
_EXCLUDED_CMDS = {
"bash", "sh", "zsh", "fish", "xterm", "xfce4-terminal", "gnome-terminal",
"lxterminal", "urxvt", "alacritty", "kitty", "foot",
"true", "false", "update-manager", "software-properties-gtk",
}
def _parse_desktop_file(path: Path) -> Optional[dict]:
"""Parse a .desktop file and return app info, or None if not a GUI app."""
try:
content = path.read_text(errors="replace")
except Exception:
return None
entry = {}
in_desktop_entry = False
for line in content.splitlines():
line = line.strip()
if line == "[Desktop Entry]":
in_desktop_entry = True
continue
if line.startswith("[") and line.endswith("]"):
if in_desktop_entry:
break # End of [Desktop Entry] section
continue
if not in_desktop_entry or "=" not in line:
continue
key, _, value = line.partition("=")
key = key.strip()
value = value.strip()
if key in ("Name", "Exec", "Icon", "Type", "NoDisplay", "Hidden",
"Categories", "Terminal", "Comment"):
entry[key] = value
# Must be Type=Application
if entry.get("Type") != "Application":
return None
# Skip hidden/no-display
if entry.get("NoDisplay", "").lower() == "true":
return None
if entry.get("Hidden", "").lower() == "true":
return None
# Skip terminal apps
if entry.get("Terminal", "").lower() == "true":
return None
exec_line = entry.get("Exec", "")
if not exec_line:
return None
# Extract command: remove field codes (%u, %U, %f, %F, etc.)
exec_clean = re.sub(r"%[a-zA-Z]", "", exec_line).strip()
# Get first token as command, strip path
parts = exec_clean.split()
if not parts:
return None
command = os.path.basename(parts[0])
if command in _EXCLUDED_CMDS or not command:
return None
# Check the command actually exists
if not shutil.which(command):
return None
# Parse categories
categories = set()
if entry.get("Categories"):
categories = {c.strip().lower() for c in entry["Categories"].split(";")}
name = entry.get("Name", command)
icon = entry.get("Icon", "🖥")
comment = entry.get("Comment", "")
return {
"name": name,
"command": command,
"icon": icon,
"comment": comment,
"categories": sorted(categories - {""}),
"desktop_file": path.name,
}
@router.get("/discover-apps")
async def discover_apps(_user: str = Depends(get_current_user)):
"""Scan .desktop files for installed GUI applications."""
apps = {}
for d in _DESKTOP_DIRS:
if not d.is_dir():
continue
for f in d.glob("*.desktop"):
app = _parse_desktop_file(f)
if app and app["command"] not in apps:
apps[app["command"]] = app
# Sort alphabetically
result = sorted(apps.values(), key=lambda a: a["name"].lower())
return result
# ---------------------------------------------------------------------------
# WebSocket — frame streaming + input
# ---------------------------------------------------------------------------
@router.websocket("/ws")
async def display_ws(
websocket: WebSocket,
app_id: str = Query(default=None),
):
"""Attach to a GUI app's window — receive JPEG frames, send input.
Query params:
- app_id: required — the GUI app to stream
- token: auth token (handled by ws_authenticate)
Server sends:
- Binary: raw JPEG frame bytes
- {"type": "meta", "app_id": "...", "title": "...", "command": "..."}
- {"type": "closed", "data": "Application exited"}
- {"type": "error", "data": "..."}
Client sends:
- {"type": "mouse", "action": "click|dblclick|move|scroll",
"x": N, "y": N, "button": 1, "delta": N}
- {"type": "key", "action": "press|release",
"key": "a", "code": "KeyA", "modifiers": ["ctrl"]}
- {"type": "set_fps", "fps": 15}
"""
username = await ws_authenticate(websocket)
await websocket.accept()
if not app_id:
await websocket.send_json({"type": "error", "data": "app_id required"})
await websocket.close(code=4000)
return
app = display_manager.get_app(username, app_id)
if not app:
await websocket.send_json({"type": "error", "data": "App not found"})
await websocket.close(code=4004)
return
# Send metadata
await websocket.send_json({
"type": "meta",
"app_id": app.app_id,
"title": app.title,
"command": app.command,
})
# Send last captured frame for instant display on reconnect
if app.last_frame:
await websocket.send_bytes(app.last_frame)
app.attach_ws(websocket)
log.info("WebSocket attached to app %s (%s) for %s", app_id, app.command, username)
_input_count = 0
try:
while True:
msg = await websocket.receive_json()
msg_type = msg.get("type")
if msg_type in ("mouse", "key"):
_input_count += 1
if _input_count <= 5:
log.info("Input #%d from %s: %s %s", _input_count, username,
msg_type, msg.get("action", msg.get("key", "")))
await app.send_input(msg)
elif msg_type == "set_fps":
fps = msg.get("fps", 10)
app.target_fps = max(1, min(30, fps))
except WebSocketDisconnect:
pass
except Exception:
log.exception("Display WS error for %s/%s", username, app_id)
finally:
app.detach_ws(websocket)
log.info("WebSocket detached from app %s for %s", app_id, username)