matrix-ai-agent/test_element_call.py

"""Playwright test: Element Call with matrix-ai-agent bot.

Usage:
    python3 test_element_call.py [--headless] [--no-e2ee-check]

Logs in as testbot-playwright, creates DM with bot, starts Element Call,
uses fake microphone audio, monitors bot logs for VAD/speech events.
"""
import asyncio
import argparse
import subprocess
import sys
import time
from playwright.async_api import async_playwright

# Test config
ELEMENT_URL = "https://element.agiliton.eu"
TEST_USER = "@testbot-playwright:agiliton.eu"
TEST_USER_LOCAL = "testbot-playwright"
TEST_PASSWORD = "TestP@ssw0rd-1771760269"
BOT_USER = "@ai:agiliton.eu"
HOMESERVER = "https://matrix.agiliton.eu"


async def wait_for_bot_event(keyword: str, timeout: int = 60) -> bool:
    """Poll bot container logs for a specific keyword."""
    deadline = time.time() + timeout
    while time.time() < deadline:
        result = subprocess.run(
            ["ssh", "root@matrix.agiliton.internal",
             "cd /opt/matrix-ai-agent && docker compose logs bot --tail=50 2>&1"],
            capture_output=True, text=True, timeout=15
        )
        if keyword in result.stdout:
            return True
        await asyncio.sleep(2)
    return False


async def run_test(headless: bool = True):
    async with async_playwright() as p:
        # Launch with fake audio device so VAD can trigger
        browser = await p.chromium.launch(
            headless=headless,
            args=[
                "--use-fake-ui-for-media-stream",
                "--use-fake-device-for-media-stream",
                "--allow-running-insecure-content",
                "--disable-web-security",
                "--no-sandbox",
            ]
        )
        context = await browser.new_context(
            permissions=["microphone", "camera"],
            # Grant media permissions automatically
        )
        page = await context.new_page()

        # Capture console logs
        page.on("console", lambda msg: print(f"  [browser] {msg.type}: {msg.text}") if msg.type in ("error", "warn") else None)

        print(f"[1] Navigating to {ELEMENT_URL}...")
        await page.goto(ELEMENT_URL, wait_until="networkidle", timeout=30000)
        await page.screenshot(path="/tmp/element-01-loaded.png")

        # Handle "Continue" button if shown (welcome screen)
        try:
            await page.click("text=Continue", timeout=3000)
        except Exception:
            pass

        print("[2] Logging in...")
        # Click Sign In button if present
        try:
            await page.click("text=Sign in", timeout=5000)
        except Exception:
            pass

        # Wait for username field
        await page.wait_for_selector("input[type='text'], input[id='mx_LoginForm_username']", timeout=15000)
        await page.screenshot(path="/tmp/element-02-login.png")

        # Fill username
        username_input = page.locator("input[type='text'], input[id='mx_LoginForm_username']").first
        await username_input.fill(TEST_USER_LOCAL)

        # Fill password
        password_input = page.locator("input[type='password']").first
        await password_input.fill(TEST_PASSWORD)

        # Submit
        await page.keyboard.press("Enter")
        await page.wait_for_timeout(5000)
        await page.screenshot(path="/tmp/element-03-after-login.png")

        # Handle "Use without" / skip verification prompts
        for skip_text in ["Use without", "Skip", "I'll verify later", "Continue"]:
            try:
                await page.click(f"text={skip_text}", timeout=2000)
                await page.wait_for_timeout(1000)
            except Exception:
                pass

        await page.screenshot(path="/tmp/element-04-home.png")

        print("[3] Creating DM with bot...")
        # Click new DM button
        try:
            # Try the compose / start DM button
            await page.click("[aria-label='Start chat'], [title='Start chat'], button:has-text('Start')", timeout=5000)
        except Exception:
            # Try the + button near People
            try:
                await page.click("[aria-label='Add room'], .mx_RoomList_headerButtons button", timeout=5000)
            except Exception:
                print("  Could not find DM button, trying navigation...")
                await page.goto(f"{ELEMENT_URL}/#/new", timeout=10000)

        await page.wait_for_timeout(2000)
        await page.screenshot(path="/tmp/element-05-dm-dialog.png")

        # Search for bot user
        try:
            dm_input = page.locator("input[type='text']").first
            await dm_input.fill(BOT_USER)
            await page.wait_for_timeout(2000)
            # Click on result
            await page.click(f"text={BOT_USER}", timeout=5000)
            await page.wait_for_timeout(1000)
            # Confirm DM
            await page.click("button:has-text('Go'), button:has-text('OK'), button:has-text('Direct Message')", timeout=5000)
        except Exception as e:
            print(f"  DM creation error: {e}")

        await page.wait_for_timeout(3000)
        await page.screenshot(path="/tmp/element-06-room.png")

        print("[4] Looking for call button...")
        # Look for the video call button in the room header
        try:
            await page.click("[aria-label='Video call'], [title='Video call'], button.mx_LegacyCallButton", timeout=10000)
            print("  Clicked video call button")
        except Exception as e:
            print(f"  Could not find call button: {e}")
            # Try text-based
            try:
                await page.click("text=Video call", timeout=5000)
            except Exception:
                pass

        await page.wait_for_timeout(5000)
        await page.screenshot(path="/tmp/element-07-call-started.png")

        print("[5] Waiting for bot to join (60s)...")
        # Monitor bot logs for connection
        bot_joined = await wait_for_bot_event("Connected", timeout=60)
        if bot_joined:
            print("  ✓ Bot joined the call!")
        else:
            print("  ✗ Bot did not join within 60s")

        print("[6] Fake microphone is active — waiting for VAD events (30s)...")
        await page.wait_for_timeout(10000)  # let call run for 10s
        await page.screenshot(path="/tmp/element-08-in-call.png")

        vad_triggered = await wait_for_bot_event("VAD: user_state=", timeout=20)
        if vad_triggered:
            print("  ✓ VAD triggered! Audio pipeline works, E2EE decryption successful.")
        else:
            print("  ✗ VAD did not trigger — either E2EE blocks audio or pipeline issue")

        speech_transcribed = await wait_for_bot_event("USER_SPEECH:", timeout=30)
        if speech_transcribed:
            print("  ✓ Speech transcribed! Full pipeline working.")
        else:
            print("  ✗ No speech transcription")

        print("[7] Checking E2EE state in logs...")
        result = subprocess.run(
            ["ssh", "root@matrix.agiliton.internal",
             "cd /opt/matrix-ai-agent && docker compose logs bot --tail=100 2>&1"],
            capture_output=True, text=True, timeout=15
        )
        for line in result.stdout.split("\n"):
            if any(kw in line for kw in ["E2EE_STATE", "VAD", "USER_SPEECH", "AGENT_SPEECH", "DEC_FAILED", "MISSING_KEY", "shared_key", "HKDF"]):
                print(f"  LOG: {line.strip()}")

        await page.wait_for_timeout(5000)
        await page.screenshot(path="/tmp/element-09-final.png")

        print("\nScreenshots saved to /tmp/element-*.png")
        await browser.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--headless", action="store_true", help="Run headless")
    args = parser.parse_args()
    asyncio.run(run_test(headless=args.headless))