diff --git a/bot.py b/bot.py
index 69569f0..85438f2 100644
--- a/bot.py
+++ b/bot.py
@@ -9,6 +9,7 @@ import re
 import time
 import uuid
 
+import docx
 import fitz  # pymupdf
 import httpx
 from openai import AsyncOpenAI
@@ -281,7 +282,7 @@ class Bot:
         self._pending_connects: dict[str, str] = {}  # matrix_user_id -> device_code
         self._pending_translate: dict[str, dict] = {}  # sender -> {text, detected_lang, room_id}
         self._pending_reply: dict[str, dict] = {}  # sender -> {target_lang}
-        self._room_pdf_context: dict[str, dict] = {}  # room_id -> {filename, text, timestamp}
+        self._room_document_context: dict[str, list[dict]] = {}  # room_id -> [{type, filename, text, timestamp}, ...]
 
     @staticmethod
     def _load_user_keys() -> dict[str, str]:
@@ -463,13 +464,19 @@ class Bot:
                     import secrets
                     bot_key = secrets.token_bytes(16)
 
-                    # Get PDF context if recently uploaded (within 1 hour)
-                    pdf_ctx = self._room_pdf_context.get(room_id, {})
-                    pdf_text = None
-                    if pdf_ctx and time.time() - pdf_ctx.get("timestamp", 0) < 3600:
-                        pdf_text = pdf_ctx.get("text")
-                        logger.info("Passing PDF context to voice session: %s (%d chars)",
-                                    pdf_ctx.get("filename", "?"), len(pdf_text) if pdf_text else 0)
+                    # Collect all recent document contexts (< 1 hour)
+                    doc_entries = [e for e in self._room_document_context.get(room_id, [])
+                                   if time.time() - e["timestamp"] < 3600]
+                    document_context = None
+                    if doc_entries:
+                        parts = []
+                        for e in doc_entries:
+                            label = {"pdf": "PDF", "image": "Bild", "text": "Datei"}.get(e["type"], "Dokument")
+                            text = e["text"][:10000] if e["type"] != "image" else e["text"][:2000]
+                            parts.append(f"[{label}: {e['filename']}]\n{text}")
+                        document_context = "\n\n".join(parts)
+                        logger.info("Passing %d document context(s) to voice session (%d chars total)",
+                                    len(doc_entries), len(document_context))
 
                     vs = VoiceSession(
                         nio_client=self.client,
@@ -482,7 +489,7 @@ class Bot:
                             self._publish_encryption_key(rid, key)),
                         memory=self.memory,
                         caller_user_id=event.sender,
-                        document_context=pdf_text,
+                        document_context=document_context,
                     )
 
                     # Check timeline for caller's key
@@ -858,7 +865,13 @@ class Bot:
 
         await self.client.room_typing(room.room_id, typing_state=True)
         try:
-            await self._respond_with_ai(room, text, sender=event.sender, image_data=(b64_data, mime_type))
+            reply = await self._respond_with_ai(room, text, sender=event.sender, image_data=(b64_data, mime_type))
+            if reply:
+                docs = self._room_document_context.setdefault(room.room_id, [])
+                docs.append({"type": "image", "filename": caption or "image",
+                             "text": reply, "timestamp": time.time()})
+                if len(docs) > 5:
+                    del docs[:-5]
         finally:
             await self.client.room_typing(room.room_id, typing_state=False)
 
@@ -916,12 +929,23 @@ class Bot:
 
         await self.client.room_typing(room.room_id, typing_state=True)
         try:
-            await self._respond_with_ai(room, text, sender=event.sender, image_data=(b64_data, mime_type))
+            reply = await self._respond_with_ai(room, text, sender=event.sender, image_data=(b64_data, mime_type))
+            if reply:
+                docs = self._room_document_context.setdefault(room.room_id, [])
+                docs.append({"type": "image", "filename": caption or "image",
+                             "text": reply, "timestamp": time.time()})
+                if len(docs) > 5:
+                    del docs[:-5]
         finally:
             await self.client.room_typing(room.room_id, typing_state=False)
 
+    # Supported text-based file extensions
+    _TEXT_EXTENSIONS = frozenset({
+        ".txt", ".md", ".csv", ".json", ".xml", ".html", ".yaml", ".yml", ".log",
+    })
+
     async def on_file_message(self, room, event: RoomMessageFile):
-        """Handle file messages: extract text from PDFs and send to AI."""
+        """Handle file messages: extract text from PDFs, docx, and text files."""
         if event.sender == BOT_USER:
             return
         if not self._sync_token_received:
@@ -930,14 +954,19 @@ class Bot:
         if time.time() - server_ts > 30:
             return
 
-        # Only handle PDFs
         source = event.source or {}
         content = source.get("content", {})
         info = content.get("info", {})
         mime_type = info.get("mimetype", "")
         filename = content.get("body", "file")
+        ext = os.path.splitext(filename.lower())[1]
 
-        if mime_type != "application/pdf" and not filename.lower().endswith(".pdf"):
+        # Determine file type
+        is_pdf = mime_type == "application/pdf" or ext == ".pdf"
+        is_docx = mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or ext == ".docx"
+        is_text = ext in self._TEXT_EXTENSIONS or mime_type.startswith("text/")
+
+        if not (is_pdf or is_docx or is_text):
             return
 
         await self._load_room_settings(room.room_id)
@@ -959,7 +988,7 @@ class Bot:
             await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).")
             return
 
-        # Download PDF
+        # Download file
         mxc_url = event.url
         if not mxc_url:
             return
@@ -968,29 +997,43 @@ class Bot:
             if not hasattr(resp, "body"):
                 logger.warning("File download failed for %s", mxc_url)
                 return
-            pdf_bytes = resp.body
+            file_bytes = resp.body
         except Exception:
             logger.exception("Failed to download file %s", mxc_url)
             return
 
-        # Extract text from PDF
-        pdf_text = self._extract_pdf_text(pdf_bytes)
-        if not pdf_text:
-            await self._send_text(room.room_id, "I couldn't extract any text from that PDF.")
+        # Extract text based on file type
+        if is_pdf:
+            extracted = self._extract_pdf_text(file_bytes)
+            doc_type = "pdf"
+        elif is_docx:
+            extracted = self._extract_docx_text(file_bytes)
+            doc_type = "text"
+        else:
+            extracted = self._extract_text_file(file_bytes)
+            doc_type = "text"
+
+        if not extracted:
+            await self._send_text(room.room_id, f"I couldn't extract any text from that file ({filename}).")
             return
 
         # Truncate to avoid token limits (roughly 50k chars ≈ 12k tokens)
-        if len(pdf_text) > 50000:
-            pdf_text = pdf_text[:50000] + "\n\n[... truncated, PDF too long ...]"
+        if len(extracted) > 50000:
+            extracted = extracted[:50000] + "\n\n[... truncated, file too long ...]"
 
-        # Store PDF context for voice session pickup
-        self._room_pdf_context[room.room_id] = {
+        # Store document context for voice session pickup
+        docs = self._room_document_context.setdefault(room.room_id, [])
+        docs.append({
+            "type": doc_type,
             "filename": filename,
-            "text": pdf_text,
+            "text": extracted,
             "timestamp": time.time(),
-        }
+        })
+        if len(docs) > 5:
+            del docs[:-5]
 
-        user_message = f'The user sent a PDF file named "{filename}". Here is the extracted text:\n\n{pdf_text}\n\nPlease summarize or answer questions about this document.'
+        label = "PDF" if is_pdf else "Word document" if is_docx else "file"
+        user_message = f'The user sent a {label} named "{filename}". Here is the extracted text:\n\n{extracted}\n\nPlease summarize or answer questions about this document.'
 
         await self.client.room_typing(room.room_id, typing_state=True)
         try:
@@ -1014,6 +1057,28 @@ class Bot:
             logger.exception("PDF text extraction failed")
             return ""
 
+    @staticmethod
+    def _extract_docx_text(docx_bytes: bytes) -> str:
+        """Extract text from .docx bytes using python-docx."""
+        try:
+            doc = docx.Document(io.BytesIO(docx_bytes))
+            return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
+        except Exception:
+            logger.exception("DOCX text extraction failed")
+            return ""
+
+    @staticmethod
+    def _extract_text_file(file_bytes: bytes) -> str:
+        """Decode text file bytes as UTF-8 with fallback to latin-1."""
+        try:
+            return file_bytes.decode("utf-8")
+        except UnicodeDecodeError:
+            try:
+                return file_bytes.decode("latin-1")
+            except Exception:
+                logger.exception("Text file decode failed")
+                return ""
+
     async def _handle_command(self, room, cmd: str, event=None):
         if cmd == "help":
             await self._send_text(room.room_id, HELP_TEXT)
@@ -1239,7 +1304,8 @@ class Bot:
         finally:
             self._pending_connects.pop(sender, None)
 
-    async def _respond_with_ai(self, room, user_message: str, sender: str = None, image_data: tuple = None):
+    async def _respond_with_ai(self, room, user_message: str, sender: str = None, image_data: tuple = None) -> str | None:
+        """Send AI response and return the reply text (or None on failure)."""
         model = self.room_models.get(room.room_id, DEFAULT_MODEL)
 
         # Fetch conversation history FIRST (needed for query rewriting)
@@ -1333,9 +1399,12 @@ class Bot:
                 gap_seconds = time.time() - last_rename if last_rename else float("inf")
                 if gap_seconds > 300:
                     await self._auto_rename_room(room, user_message, reply)
+
+            return reply
         except Exception:
             logger.exception("LLM call failed")
             await self._send_text(room.room_id, "Sorry, I couldn't generate a response.")
+            return None
 
     async def _rewrite_query(self, user_message: str, history: list[dict], model: str) -> str:
         """Rewrite user message into a standalone search query using conversation context."""
diff --git a/requirements.txt b/requirements.txt
index c42f1b1..f46830b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ canonicaljson>=2.0,<3.0
 httpx>=0.27,<1.0
 openai>=2.0,<3.0
 pymupdf>=1.24,<2.0
+python-docx>=1.0,<2.0