From 01dd96c71c27088387454ada482e029a1a4ac753 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 30 Apr 2026 02:57:52 -0700 Subject: [PATCH] fix(security): constrain research article paths --- .../assets/t3_web_research_and_cite/serve.py | 20 ++++++++++----- tests/test_t3_web_research_serve.py | 25 +++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 tests/test_t3_web_research_serve.py diff --git a/tasks-public/assets/t3_web_research_and_cite/serve.py b/tasks-public/assets/t3_web_research_and_cite/serve.py index 22f0d23..e106ddc 100644 --- a/tasks-public/assets/t3_web_research_and_cite/serve.py +++ b/tasks-public/assets/t3_web_research_and_cite/serve.py @@ -5,13 +5,23 @@ from __future__ import annotations import os from http.server import BaseHTTPRequestHandler, HTTPServer from pathlib import Path +from urllib.parse import unquote, urlsplit ROOT = Path(__file__).parent / "articles" +ARTICLES = {path.stem: path for path in ROOT.glob("*.html") if path.is_file()} + + +def article_for_request_path(request_path: str) -> Path | None: + path = unquote(urlsplit(request_path).path) + if not path.startswith("/article/"): + return None + slug = path.removeprefix("/article/") + return ARTICLES.get(slug) class Handler(BaseHTTPRequestHandler): def do_GET(self) -> None: # noqa: N802 - path = self.path.split("?")[0] + path = unquote(urlsplit(self.path).path) if path == "/health": self.send_response(200) self.send_header("Content-Type", "application/json") @@ -22,9 +32,8 @@ class Handler(BaseHTTPRequestHandler): self._index() return if path.startswith("/article/"): - slug = path.split("/", 2)[2] - article = ROOT / f"{slug}.html" - if article.exists(): + article = article_for_request_path(self.path) + if article is not None: self._html(article.read_bytes()) return self.send_response(404) @@ -33,8 +42,7 @@ class Handler(BaseHTTPRequestHandler): def _index(self) -> None: items = [] - for f in sorted(ROOT.glob("*.html")): - slug = f.stem + for slug in sorted(ARTICLES): items.append(f'
  • {slug}
  • ') body = ( "" diff --git a/tests/test_t3_web_research_serve.py b/tests/test_t3_web_research_serve.py new file mode 100644 index 0000000..2d0fb05 --- /dev/null +++ b/tests/test_t3_web_research_serve.py @@ -0,0 +1,25 @@ +from importlib import util +from pathlib import Path + + +def load_serve_module(): + serve_path = ( + Path(__file__).resolve().parents[1] + / "tasks-public" + / "assets" + / "t3_web_research_and_cite" + / "serve.py" + ) + spec = util.spec_from_file_location("t3_web_research_serve", serve_path) + module = util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + return module + + +def test_article_paths_resolve_only_known_article_slugs(): + serve = load_serve_module() + + assert serve.article_for_request_path("/article/01_grid_basics").name == "01_grid_basics.html" + assert serve.article_for_request_path("/article/../../serve.py") is None + assert serve.article_for_request_path("/article/%2e%2e/%2e%2e/serve.py") is None