fix: handle Element X MSC4143 v2 encryption key format (memberships array)
Element X embeds E2EE keys inside memberships[].encryption_keys, not at the top level of the call.member state event content. Bot was only checking content.encryption_keys, so it never found the caller's key — causing 'Warten auf Medien' (waiting for media) because encrypted audio couldn't be decrypted. - Added _extract_enc_keys_from_content() helper handling both formats - Updated on_unknown handler, VoiceSession creation, and key fetch - Bot now publishes keys in both formats for compatibility - Updated voice.py state fetch to check memberships[] fallback Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
89
bot.py
89
bot.py
@@ -51,6 +51,32 @@ from cross_signing import CrossSigningManager
|
|||||||
BOT_DEVICE_ID = "AIBOT"
|
BOT_DEVICE_ID = "AIBOT"
|
||||||
CALL_MEMBER_TYPE = "org.matrix.msc3401.call.member"
|
CALL_MEMBER_TYPE = "org.matrix.msc3401.call.member"
|
||||||
ENCRYPTION_KEYS_TYPE = "io.element.call.encryption_keys"
|
ENCRYPTION_KEYS_TYPE = "io.element.call.encryption_keys"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_enc_keys_from_content(content: dict, skip_device: str = "") -> list[dict]:
|
||||||
|
"""Extract encryption_keys from call.member content, handling both formats.
|
||||||
|
|
||||||
|
Format 1 (legacy): content.encryption_keys = [{key, index}, ...]
|
||||||
|
Format 2 (MSC4143 v2 / Element X): content.memberships[].encryption_keys = [{key, index}, ...]
|
||||||
|
|
||||||
|
Returns list of {key, index, device_id} dicts.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
# Format 1: top-level encryption_keys
|
||||||
|
top_keys = content.get("encryption_keys", content.get("keys", []))
|
||||||
|
if top_keys:
|
||||||
|
device = content.get("device_id", "")
|
||||||
|
if device != skip_device:
|
||||||
|
for k in top_keys:
|
||||||
|
results.append({"key": k.get("key", ""), "index": k.get("index", 0), "device_id": device})
|
||||||
|
# Format 2: memberships array (Element X / MSC4143 v2)
|
||||||
|
for m in content.get("memberships", []):
|
||||||
|
device = m.get("device_id", "")
|
||||||
|
if device == skip_device:
|
||||||
|
continue
|
||||||
|
for k in m.get("encryption_keys", []):
|
||||||
|
results.append({"key": k.get("key", ""), "index": k.get("index", 0), "device_id": device})
|
||||||
|
return [r for r in results if r["key"]]
|
||||||
MODEL_STATE_TYPE = "ai.agiliton.model"
|
MODEL_STATE_TYPE = "ai.agiliton.model"
|
||||||
RENAME_STATE_TYPE = "ai.agiliton.auto_rename"
|
RENAME_STATE_TYPE = "ai.agiliton.auto_rename"
|
||||||
|
|
||||||
@@ -1564,21 +1590,20 @@ class Bot:
|
|||||||
|
|
||||||
# MSC4143: encryption keys may be embedded in call.member state events
|
# MSC4143: encryption keys may be embedded in call.member state events
|
||||||
# Check for keys BEFORE the early return for active calls
|
# Check for keys BEFORE the early return for active calls
|
||||||
enc_keys = content.get("encryption_keys", content.get("keys", []))
|
# Handles both top-level encryption_keys and memberships[].encryption_keys (Element X)
|
||||||
if enc_keys and room_id in self.voice_sessions:
|
enc_key_entries = _extract_enc_keys_from_content(content, skip_device=BOT_DEVICE_ID)
|
||||||
device_id = content.get("device_id", "")
|
if enc_key_entries and room_id in self.voice_sessions:
|
||||||
logger.info("Found encryption_keys in call.member event from %s (device=%s, keys=%d)",
|
logger.info("Found %d encryption key(s) in call.member event from %s",
|
||||||
event.sender, device_id, len(enc_keys))
|
len(enc_key_entries), event.sender)
|
||||||
vs = self.voice_sessions[room_id]
|
vs = self.voice_sessions[room_id]
|
||||||
import base64 as b64
|
import base64 as b64
|
||||||
for k in enc_keys:
|
for entry in enc_key_entries:
|
||||||
key_b64 = k.get("key", "")
|
key_b64 = entry["key"]
|
||||||
key_index = k.get("index", 0)
|
|
||||||
if key_b64:
|
|
||||||
key_b64 += "=" * (-len(key_b64) % 4)
|
key_b64 += "=" * (-len(key_b64) % 4)
|
||||||
key_bytes = b64.urlsafe_b64decode(key_b64)
|
key_bytes = b64.urlsafe_b64decode(key_b64)
|
||||||
vs.on_encryption_key(event.sender, device_id, key_bytes, key_index)
|
vs.on_encryption_key(event.sender, entry["device_id"], key_bytes, entry["index"])
|
||||||
logger.info("Delivered call.member embedded key (index=%d) to voice session", key_index)
|
logger.info("Delivered call.member embedded key (index=%d, device=%s) to voice session",
|
||||||
|
entry["index"], entry["device_id"])
|
||||||
|
|
||||||
if room_id in self.active_calls:
|
if room_id in self.active_calls:
|
||||||
return
|
return
|
||||||
@@ -1621,7 +1646,15 @@ class Bot:
|
|||||||
},
|
},
|
||||||
"foci_preferred": foci,
|
"foci_preferred": foci,
|
||||||
"m.call.intent": "audio",
|
"m.call.intent": "audio",
|
||||||
|
# Publish keys in both formats for compatibility
|
||||||
"encryption_keys": [{"index": 0, "key": bot_key_b64}],
|
"encryption_keys": [{"index": 0, "key": bot_key_b64}],
|
||||||
|
"memberships": [{
|
||||||
|
"device_id": BOT_DEVICE_ID,
|
||||||
|
"encryption_keys": [{"index": 0, "key": bot_key_b64}],
|
||||||
|
"expires": 7200000,
|
||||||
|
"foci_preferred": foci,
|
||||||
|
"focus_active": {"type": "livekit", "focus_selection": "oldest_membership"},
|
||||||
|
}],
|
||||||
}
|
}
|
||||||
|
|
||||||
state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call"
|
state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call"
|
||||||
@@ -1670,18 +1703,18 @@ class Bot:
|
|||||||
|
|
||||||
# Check for caller's key: first in call_member event (MSC4143),
|
# Check for caller's key: first in call_member event (MSC4143),
|
||||||
# then fall back to timeline scan (legacy io.element.call.encryption_keys)
|
# then fall back to timeline scan (legacy io.element.call.encryption_keys)
|
||||||
|
# Handles both top-level and memberships[] format (Element X)
|
||||||
caller_key = None
|
caller_key = None
|
||||||
caller_enc_keys = content.get("encryption_keys", content.get("keys", []))
|
caller_enc_entries = _extract_enc_keys_from_content(content, skip_device=BOT_DEVICE_ID)
|
||||||
if caller_enc_keys:
|
if caller_enc_entries:
|
||||||
import base64 as b64
|
import base64 as b64
|
||||||
for k in caller_enc_keys:
|
for entry in caller_enc_entries:
|
||||||
key_b64 = k.get("key", "")
|
key_b64 = entry["key"]
|
||||||
key_index = k.get("index", 0)
|
|
||||||
if key_b64:
|
|
||||||
key_b64 += "=" * (-len(key_b64) % 4)
|
key_b64 += "=" * (-len(key_b64) % 4)
|
||||||
caller_key = b64.urlsafe_b64decode(key_b64)
|
caller_key = b64.urlsafe_b64decode(key_b64)
|
||||||
vs.on_encryption_key(event.sender, caller_device_id, caller_key, key_index)
|
vs.on_encryption_key(event.sender, entry["device_id"], caller_key, entry["index"])
|
||||||
logger.info("Got caller E2EE key from call.member event (index=%d)", key_index)
|
logger.info("Got caller E2EE key from call.member event (index=%d, device=%s)",
|
||||||
|
entry["index"], entry["device_id"])
|
||||||
|
|
||||||
if not caller_key:
|
if not caller_key:
|
||||||
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
|
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
|
||||||
@@ -3537,6 +3570,7 @@ class Bot:
|
|||||||
token = self.client.access_token
|
token = self.client.access_token
|
||||||
|
|
||||||
# MSC4143: Check call.member state events for embedded encryption_keys
|
# MSC4143: Check call.member state events for embedded encryption_keys
|
||||||
|
# Handles both top-level and memberships[] format (Element X)
|
||||||
state_url = f"{HOMESERVER}/_matrix/client/v3/rooms/{room_id}/state"
|
state_url = f"{HOMESERVER}/_matrix/client/v3/rooms/{room_id}/state"
|
||||||
async with httpx.AsyncClient(timeout=10.0) as http:
|
async with httpx.AsyncClient(timeout=10.0) as http:
|
||||||
resp = await http.get(state_url, headers={"Authorization": f"Bearer {token}"})
|
resp = await http.get(state_url, headers={"Authorization": f"Bearer {token}"})
|
||||||
@@ -3549,17 +3583,16 @@ class Bot:
|
|||||||
if evt_sender == BOT_USER:
|
if evt_sender == BOT_USER:
|
||||||
continue
|
continue
|
||||||
content = evt.get("content", {})
|
content = evt.get("content", {})
|
||||||
enc_keys = content.get("encryption_keys", [])
|
entries = _extract_enc_keys_from_content(content, skip_device=BOT_DEVICE_ID)
|
||||||
if enc_keys:
|
if entries:
|
||||||
device = content.get("device_id", "")
|
logger.info("Found %d encryption key(s) in call.member state from %s",
|
||||||
logger.info("Found encryption_keys in call.member state from %s (device=%s, keys=%d)",
|
len(entries), evt_sender)
|
||||||
evt_sender, device, len(enc_keys))
|
entry = entries[0]
|
||||||
for k in enc_keys:
|
key_b64 = entry["key"]
|
||||||
key_b64 = k.get("key", "")
|
|
||||||
if key_b64:
|
|
||||||
key_b64 += "=" * (-len(key_b64) % 4)
|
key_b64 += "=" * (-len(key_b64) % 4)
|
||||||
key = base64.urlsafe_b64decode(key_b64)
|
key = base64.urlsafe_b64decode(key_b64)
|
||||||
logger.info("Got E2EE key from call.member state (%s, %d bytes)", evt_sender, len(key))
|
logger.info("Got E2EE key from call.member state (%s, device=%s, %d bytes)",
|
||||||
|
evt_sender, entry["device_id"], len(key))
|
||||||
return key
|
return key
|
||||||
|
|
||||||
# Legacy: scan timeline for io.element.call.encryption_keys events
|
# Legacy: scan timeline for io.element.call.encryption_keys events
|
||||||
|
|||||||
14
voice.py
14
voice.py
@@ -554,6 +554,7 @@ class VoiceSession:
|
|||||||
user_id = self.nio_client.user_id
|
user_id = self.nio_client.user_id
|
||||||
|
|
||||||
# MSC4143: check call.member state events first
|
# MSC4143: check call.member state events first
|
||||||
|
# Handles both top-level encryption_keys and memberships[].encryption_keys (Element X)
|
||||||
try:
|
try:
|
||||||
state_url = f"{homeserver}/_matrix/client/v3/rooms/{self.room_id}/state"
|
state_url = f"{homeserver}/_matrix/client/v3/rooms/{self.room_id}/state"
|
||||||
async with httpx.AsyncClient(timeout=10.0) as http:
|
async with httpx.AsyncClient(timeout=10.0) as http:
|
||||||
@@ -566,7 +567,17 @@ class VoiceSession:
|
|||||||
if sender == user_id:
|
if sender == user_id:
|
||||||
continue
|
continue
|
||||||
content = evt.get("content", {})
|
content = evt.get("content", {})
|
||||||
|
# Extract keys from both formats
|
||||||
enc_keys = content.get("encryption_keys", [])
|
enc_keys = content.get("encryption_keys", [])
|
||||||
|
# MSC4143 v2 / Element X: keys inside memberships array
|
||||||
|
if not enc_keys:
|
||||||
|
for m in content.get("memberships", []):
|
||||||
|
m_keys = m.get("encryption_keys", [])
|
||||||
|
if m_keys:
|
||||||
|
enc_keys = m_keys
|
||||||
|
# Use device_id from membership entry
|
||||||
|
content = {**content, "device_id": m.get("device_id", content.get("device_id", ""))}
|
||||||
|
break
|
||||||
if enc_keys:
|
if enc_keys:
|
||||||
device = content.get("device_id", "")
|
device = content.get("device_id", "")
|
||||||
import base64 as b64
|
import base64 as b64
|
||||||
@@ -581,7 +592,8 @@ class VoiceSession:
|
|||||||
self.on_encryption_key(sender, device, key_bytes, key_index)
|
self.on_encryption_key(sender, device, key_bytes, key_index)
|
||||||
max_idx = max(self._caller_all_keys.keys()) if self._caller_all_keys else key_index
|
max_idx = max(self._caller_all_keys.keys()) if self._caller_all_keys else key_index
|
||||||
latest = self._caller_all_keys.get(max_idx, key_bytes)
|
latest = self._caller_all_keys.get(max_idx, key_bytes)
|
||||||
logger.info("Got key from call.member state (sender=%s, index=%d)", sender, key_index)
|
logger.info("Got key from call.member state (sender=%s, device=%s, index=%d)",
|
||||||
|
sender, device, key_index)
|
||||||
return latest
|
return latest
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("call.member state key fetch failed: %s", e)
|
logger.debug("call.member state key fetch failed: %s", e)
|
||||||
|
|||||||
Reference in New Issue
Block a user