atlus/backend/routers/display.py
roberts a73b515258 Add native GUI app support via per-window frame streaming
Each configured GUI app (e.g. Nextcloud) gets its own dock icon and
opens as a regular Atlus tab. Under the hood: Xvfb virtual display,
ImageMagick captures individual window pixmaps as JPEG, streams over
WebSocket to a canvas element, with xdotool forwarding mouse/keyboard
input back to the X11 window. Apps persist in background when tab is
closed, and streaming pauses when no viewers are attached.

New files: backend/display.py (DisplayManager + ManagedGuiApp),
backend/routers/display.py (WebSocket + REST), frontend display.js/css.
Config: gui_apps array in settings for registered applications.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 23:09:15 -05:00

161 lines
5.1 KiB
Python

"""Display — WebSocket frame streaming + REST app management for GUI apps.
Pattern mirrors terminal.py: WebSocket attaches to a running GUI app,
receives JPEG frames, sends input events. REST endpoints manage app lifecycle.
"""
import logging
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query, WebSocket, WebSocketDisconnect
from pydantic import BaseModel
from backend.auth import get_current_user, ws_authenticate
from backend.display import display_manager, HAS_DISPLAY_DEPS
router = APIRouter(prefix="/api/display", tags=["display"])
log = logging.getLogger("atlus.display.router")
# ---------------------------------------------------------------------------
# Guards
# ---------------------------------------------------------------------------
def _require_deps():
if not HAS_DISPLAY_DEPS:
raise HTTPException(503, "Display dependencies not installed (Xvfb, xdotool, ImageMagick)")
# ---------------------------------------------------------------------------
# REST — app lifecycle
# ---------------------------------------------------------------------------
class AppLaunchRequest(BaseModel):
command: str
title: str = ""
args: list[str] = []
target_fps: int = 10
@router.get("/apps")
async def list_apps(user: str = Depends(get_current_user)):
"""List running GUI apps for the current user."""
_require_deps()
return {"apps": display_manager.list_apps(user)}
@router.post("/apps")
async def launch_app(req: AppLaunchRequest, user: str = Depends(get_current_user)):
"""Launch a GUI application on the user's virtual display."""
_require_deps()
# Check if this command is already running
existing = display_manager.get_app_by_command(user, req.command)
if existing:
return existing.to_dict()
try:
app = await display_manager.launch_app(
user, req.command, req.title, req.args, req.target_fps,
)
return app.to_dict()
except ValueError as e:
raise HTTPException(400, str(e))
except FileNotFoundError as e:
raise HTTPException(404, str(e))
except RuntimeError as e:
raise HTTPException(500, str(e))
@router.delete("/apps/{app_id}")
async def close_app(app_id: str, user: str = Depends(get_current_user)):
"""Stop a running GUI application."""
_require_deps()
if not display_manager.close_app(user, app_id):
raise HTTPException(404, "App not found")
return {"ok": True}
@router.get("/status")
async def display_status(user: str = Depends(get_current_user)):
"""Check display system availability."""
return {
"available": HAS_DISPLAY_DEPS,
"apps": display_manager.list_apps(user),
}
# ---------------------------------------------------------------------------
# WebSocket — frame streaming + input
# ---------------------------------------------------------------------------
@router.websocket("/ws")
async def display_ws(
websocket: WebSocket,
app_id: str = Query(default=None),
):
"""Attach to a GUI app's window — receive JPEG frames, send input.
Query params:
- app_id: required — the GUI app to stream
- token: auth token (handled by ws_authenticate)
Server sends:
- Binary: raw JPEG frame bytes
- {"type": "meta", "app_id": "...", "title": "...", "command": "..."}
- {"type": "closed", "data": "Application exited"}
- {"type": "error", "data": "..."}
Client sends:
- {"type": "mouse", "action": "click|dblclick|move|scroll",
"x": N, "y": N, "button": 1, "delta": N}
- {"type": "key", "action": "press|release",
"key": "a", "code": "KeyA", "modifiers": ["ctrl"]}
- {"type": "set_fps", "fps": 15}
"""
username = await ws_authenticate(websocket)
await websocket.accept()
if not app_id:
await websocket.send_json({"type": "error", "data": "app_id required"})
await websocket.close(code=4000)
return
app = display_manager.get_app(username, app_id)
if not app:
await websocket.send_json({"type": "error", "data": "App not found"})
await websocket.close(code=4004)
return
# Send metadata
await websocket.send_json({
"type": "meta",
"app_id": app.app_id,
"title": app.title,
"command": app.command,
})
# Send last captured frame for instant display on reconnect
if app.last_frame:
await websocket.send_bytes(app.last_frame)
app.attach_ws(websocket)
log.info("WebSocket attached to app %s (%s) for %s", app_id, app.command, username)
try:
while True:
msg = await websocket.receive_json()
msg_type = msg.get("type")
if msg_type in ("mouse", "key"):
await app.send_input(msg)
elif msg_type == "set_fps":
fps = msg.get("fps", 10)
app.target_fps = max(1, min(30, fps))
except WebSocketDisconnect:
pass
except Exception:
log.exception("Display WS error for %s/%s", username, app_id)
finally:
app.detach_ws(websocket)
log.info("WebSocket detached from app %s for %s", app_id, username)