Complete rewrite of input forwarding — fix mouse and keyboard events

Root causes fixed:
- _focused was a stray class annotation in @dataclass, causing field
  ordering issues — moved to proper dataclass field
- xdotool type --window WID not supported on all versions — removed
  --window flag, use focused window instead
- xdotool commands with --window may fail silently — switched to
  absolute coordinates (window is at 0,0 filling the display)
- All xdotool errors were silently swallowed — now logged with stderr

Mouse events:
- Use absolute mousemove + click (no --window) since window fills display
- Separate mousemove and click into two calls for reliability
- Fire-and-forget for mousemove to reduce latency

Keyboard events:
- xdotool type (no --window) for printable characters
- xdotool key (no --window) for special keys and modifier combos
- Window focused once via _ensure_focus, not per-event

Diagnostics:
- Backend logs first 5 input events received per WebSocket session
- Backend logs xdotool stderr on failure
- Frontend logs first 10 input events sent + WS state warnings
- Frontend uses capture phase for keyboard events

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
roberts 2026-03-15 01:22:59 -05:00
parent 8d2a228599
commit a5a7b01fd9
3 changed files with 107 additions and 83 deletions

View file

@ -97,6 +97,7 @@ class ManagedGuiApp:
_stderr_lines: list = field(default_factory=list, repr=False)
_websockets: list = field(default_factory=list, repr=False)
_streaming: bool = field(default=False, repr=False)
_focused: bool = field(default=False, repr=False)
@property
def alive(self) -> bool:
@ -365,21 +366,21 @@ class ManagedGuiApp:
return
env = self._display_env()
wid = str(self.window_id)
try:
# Activate (focus) the window
await self._xdotool("windowactivate", "--sync", wid, env=env)
# Move to origin
await self._xdotool("windowmove", "--sync", wid, "0", "0", env=env)
# Resize to fill display (Xvfb is 1280x1024)
await self._xdotool("windowsize", "--sync", wid, "1280", "1024", env=env)
# Focus the window for input
await self._xdotool("windowfocus", wid, env=env)
# Activate (focus) the window
await self._xdotool("windowactivate", "--sync", wid, env=env)
# Move to origin
await self._xdotool("windowmove", "--sync", wid, "0", "0", env=env)
# Resize to fill display (Xvfb is 1280x1024)
await self._xdotool("windowsize", "--sync", wid, "1280", "1024", env=env)
# Focus the window for input
rc = await self._xdotool("windowfocus", wid, env=env)
if rc == 0:
self._focused = True
# Some apps need a moment to redraw after resize
await asyncio.sleep(0.3)
log.debug("Maximized window %s to 1280x1024 for %s", wid, self.app_id)
except Exception as e:
log.debug("Failed to maximize window %s: %s", wid, e)
log.info("Window %s maximized and focused for %s", wid, self.app_id)
else:
log.warning("Window %s maximize completed but focus failed for %s", wid, self.app_id)
# Some apps need a moment to redraw after resize
await asyncio.sleep(0.3)
async def _xdotool_search(self, *args, env=None) -> Optional[int]:
"""Run xdotool search and return first window ID, or None."""
@ -400,15 +401,17 @@ class ManagedGuiApp:
# ---- Input forwarding ----
_focused: bool = False # tracks whether we've activated the window
async def _ensure_focus(self, env: dict):
"""Activate/focus the window once, then skip on subsequent calls."""
if not self._focused:
wid = str(self.window_id)
await self._xdotool("windowactivate", wid, env=env)
await self._xdotool("windowfocus", wid, env=env)
self._focused = True
rc1 = await self._xdotool("windowactivate", wid, env=env)
rc2 = await self._xdotool("windowfocus", wid, env=env)
if rc1 == 0 and rc2 == 0:
self._focused = True
log.info("Window %s focused for input (app %s)", wid, self.app_id)
else:
log.warning("Failed to focus window %s (activate=%s, focus=%s)", wid, rc1, rc2)
async def send_input(self, msg: dict):
"""Forward mouse/keyboard input to the X11 window."""
@ -423,42 +426,30 @@ class ManagedGuiApp:
elif msg_type == "key":
await self._handle_key(msg, env)
except Exception:
log.debug("Input forwarding error for %s", self.app_id, exc_info=True)
log.warning("Input forwarding error for %s", self.app_id, exc_info=True)
async def _handle_mouse(self, msg: dict, env: dict):
action = msg.get("action", "")
x, y = str(msg.get("x", 0)), str(msg.get("y", 0))
wid = str(self.window_id)
if action == "click":
await self._ensure_focus(env)
btn = str(msg.get("button", 1))
await self._xdotool(
"mousemove", "--window", wid, x, y,
"click", "--window", wid, btn,
env=env,
)
# Use absolute coordinates — window is at 0,0 filling display
await self._xdotool("mousemove", x, y, env=env)
await self._xdotool("click", btn, env=env)
elif action == "dblclick":
await self._ensure_focus(env)
btn = str(msg.get("button", 1))
await self._xdotool(
"mousemove", "--window", wid, x, y,
"click", "--window", wid, "--repeat", "2", btn,
env=env,
)
await self._xdotool("mousemove", x, y, env=env)
await self._xdotool("click", "--repeat", "2", btn, env=env)
elif action == "move":
await self._xdotool_fire(
"mousemove", "--window", wid, x, y,
env=env,
)
self._xdotool_fire("mousemove", x, y, env=env)
elif action == "scroll":
delta = msg.get("delta", 0)
btn = "4" if delta < 0 else "5" # X11: 4=up, 5=down
await self._xdotool(
"mousemove", "--window", wid, x, y,
"click", "--window", wid, btn,
env=env,
)
await self._xdotool("mousemove", x, y, env=env)
await self._xdotool("click", btn, env=env)
async def _handle_key(self, msg: dict, env: dict):
action = msg.get("action", "press")
@ -474,7 +465,6 @@ class ManagedGuiApp:
return
await self._ensure_focus(env)
wid = str(self.window_id)
# Determine which modifiers are held (excluding Shift for printable chars)
active_mods = []
@ -484,12 +474,9 @@ class ManagedGuiApp:
active_mods.append(xmod)
# Single printable character with no ctrl/alt/super modifiers → use xdotool type
# This handles all special characters (@, #, !, etc.) correctly
# xdotool type handles @, #, !, etc. correctly via keyboard simulation
if len(key) == 1 and not active_mods:
await self._xdotool(
"type", "--delay", "0", "--clearmodifiers", "--window", wid, key,
env=env,
)
await self._xdotool("type", "--delay", "0", "--clearmodifiers", key, env=env)
return
# Non-printable key or key combo with modifiers → use xdotool key
@ -507,39 +494,45 @@ class ManagedGuiApp:
if mod_parts:
xkey = "+".join(mod_parts) + "+" + xkey
await self._xdotool("key", "--window", wid, xkey, env=env)
await self._xdotool("key", xkey, env=env)
async def _xdotool(self, *args, env=None):
"""Run xdotool and wait for completion."""
proc = await asyncio.create_subprocess_exec(
XDOTOOL_BIN, *args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
)
await asyncio.wait_for(proc.communicate(), timeout=5)
async def _xdotool_fire(self, *args, env=None):
"""Run xdotool without waiting — fire and forget for low-latency ops."""
proc = await asyncio.create_subprocess_exec(
XDOTOOL_BIN, *args,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL,
env=env,
)
# Don't await — let it complete in background
asyncio.create_task(self._reap(proc))
@staticmethod
async def _reap(proc):
"""Reap a fire-and-forget subprocess to avoid zombies."""
async def _xdotool(self, *args, env=None) -> int:
"""Run xdotool and wait for completion. Returns exit code."""
try:
proc = await asyncio.create_subprocess_exec(
XDOTOOL_BIN, *args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=5)
if proc.returncode != 0:
err = stderr.decode(errors="replace").strip() if stderr else ""
log.warning("xdotool %s failed (rc=%d): %s", args[0] if args else "?", proc.returncode, err)
return proc.returncode
except asyncio.TimeoutError:
log.warning("xdotool %s timed out", args[0] if args else "?")
return -1
except Exception as e:
log.warning("xdotool %s exception: %s", args[0] if args else "?", e)
return -1
def _xdotool_fire(self, *args, env=None):
"""Schedule xdotool without waiting — fire and forget for low-latency ops."""
asyncio.create_task(self._xdotool_fire_async(*args, env=env))
async def _xdotool_fire_async(self, *args, env=None):
"""Fire-and-forget xdotool execution."""
try:
proc = await asyncio.create_subprocess_exec(
XDOTOOL_BIN, *args,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL,
env=env,
)
await asyncio.wait_for(proc.wait(), timeout=5)
except Exception:
try:
proc.kill()
except Exception:
pass
pass
# ---- Lifecycle ----

View file

@ -301,12 +301,17 @@ async def display_ws(
app.attach_ws(websocket)
log.info("WebSocket attached to app %s (%s) for %s", app_id, app.command, username)
_input_count = 0
try:
while True:
msg = await websocket.receive_json()
msg_type = msg.get("type")
if msg_type in ("mouse", "key"):
_input_count += 1
if _input_count <= 5:
log.info("Input #%d from %s: %s %s", _input_count, username,
msg_type, msg.get("action", msg.get("key", "")))
await app.send_input(msg)
elif msg_type == "set_fps":
fps = msg.get("fps", 10)

View file

@ -29,6 +29,8 @@
ws: null,
serverAppId: null, // backend app_id
status: 'connecting',
_lastMove: 0,
_inputCount: 0,
};
appState[guiConfig.id] = state;
@ -52,6 +54,8 @@
canvas.width = 800;
canvas.height = 600;
canvas.tabIndex = 0;
// Ensure canvas is focusable and captures all events
canvas.style.outline = 'none';
wrap.appendChild(canvas);
container.appendChild(wrap);
@ -124,6 +128,7 @@
state.ws = ws;
ws.onopen = () => {
console.log('[display] WebSocket connected for', state.config.command);
showStatus(state, 'connecting', 'Waiting for window…');
};
@ -135,13 +140,14 @@
// JSON message
try {
const msg = JSON.parse(e.data);
console.log('[display] WS message:', msg);
handleMessage(state, msg);
} catch (err) {}
}
};
ws.onclose = () => {
// Don't auto-reconnect — the app tab will re-init if reopened
ws.onclose = (e) => {
console.log('[display] WebSocket closed:', e.code, e.reason);
};
ws.onerror = () => {
@ -156,6 +162,7 @@
state.statusEl.style.display = 'none';
state.wrap.style.display = 'flex';
state.canvas.focus();
console.log('[display] First frame received — canvas visible, input active');
}
const blob = new Blob([buffer], { type: 'image/jpeg' });
@ -164,6 +171,7 @@
if (state.canvas.width !== img.width || state.canvas.height !== img.height) {
state.canvas.width = img.width;
state.canvas.height = img.height;
console.log('[display] Canvas resized to', img.width, 'x', img.height);
}
state.ctx.drawImage(img, 0, 0);
URL.revokeObjectURL(img.src);
@ -215,6 +223,15 @@
function send(msg) {
if (state.ws && state.ws.readyState === WebSocket.OPEN) {
state.ws.send(JSON.stringify(msg));
// Log first few events for debugging
state._inputCount++;
if (state._inputCount <= 10) {
console.log('[display] Input sent:', msg.type, msg.action || msg.key || '');
} else if (state._inputCount === 11) {
console.log('[display] (suppressing further input logs)');
}
} else {
console.warn('[display] Cannot send input — WS not open, state:', state.ws?.readyState);
}
}
@ -229,33 +246,42 @@
// Mouse events
canvas.addEventListener('mousedown', (e) => {
e.preventDefault();
e.stopPropagation();
canvas.focus();
const coords = scaleCoords(e);
send({ type: 'mouse', action: 'click', ...coords, button: e.button + 1 });
});
canvas.addEventListener('dblclick', (e) => {
e.preventDefault();
e.stopPropagation();
const coords = scaleCoords(e);
send({ type: 'mouse', action: 'dblclick', ...coords, button: e.button + 1 });
});
canvas.addEventListener('mousemove', (e) => {
// Throttle mousemove to ~30fps
if (state._lastMove && Date.now() - state._lastMove < 33) return;
state._lastMove = Date.now();
const now = Date.now();
if (now - state._lastMove < 33) return;
state._lastMove = now;
const coords = scaleCoords(e);
send({ type: 'mouse', action: 'move', ...coords });
});
canvas.addEventListener('wheel', (e) => {
e.preventDefault();
e.stopPropagation();
const coords = scaleCoords(e);
send({ type: 'mouse', action: 'scroll', ...coords, delta: e.deltaY > 0 ? 1 : -1 });
}, { passive: false });
canvas.addEventListener('contextmenu', (e) => e.preventDefault());
canvas.addEventListener('contextmenu', (e) => {
e.preventDefault();
e.stopPropagation();
});
// Keyboard events
// Keyboard events — capture phase to intercept before Atlus shell
canvas.addEventListener('keydown', (e) => {
e.preventDefault();
e.stopPropagation();
@ -264,7 +290,7 @@
key: e.key, code: e.code,
modifiers: getModifiers(e),
});
});
}, true); // capture phase
canvas.addEventListener('keyup', (e) => {
e.preventDefault();
@ -274,7 +300,7 @@
key: e.key, code: e.code,
modifiers: getModifiers(e),
});
});
}, true); // capture phase
}
// ---- App registration (no left dock — apps live in the right panel) ----