fix(security): constrain research article paths
Some checks are pending
CI / Python ${{ matrix.python-version }} test suite (3.11) (push) Waiting to run
CI / Python ${{ matrix.python-version }} test suite (3.12) (push) Waiting to run
Sync main to HF Space / mirror (push) Waiting to run

This commit is contained in:
Vincent Koc 2026-04-30 02:57:52 -07:00
parent e80902bafa
commit 01dd96c71c
No known key found for this signature in database
2 changed files with 39 additions and 6 deletions

View File

@ -5,13 +5,23 @@ from __future__ import annotations
import os
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
from urllib.parse import unquote, urlsplit
ROOT = Path(__file__).parent / "articles"
ARTICLES = {path.stem: path for path in ROOT.glob("*.html") if path.is_file()}
def article_for_request_path(request_path: str) -> Path | None:
path = unquote(urlsplit(request_path).path)
if not path.startswith("/article/"):
return None
slug = path.removeprefix("/article/")
return ARTICLES.get(slug)
class Handler(BaseHTTPRequestHandler):
def do_GET(self) -> None: # noqa: N802
path = self.path.split("?")[0]
path = unquote(urlsplit(self.path).path)
if path == "/health":
self.send_response(200)
self.send_header("Content-Type", "application/json")
@ -22,9 +32,8 @@ class Handler(BaseHTTPRequestHandler):
self._index()
return
if path.startswith("/article/"):
slug = path.split("/", 2)[2]
article = ROOT / f"{slug}.html"
if article.exists():
article = article_for_request_path(self.path)
if article is not None:
self._html(article.read_bytes())
return
self.send_response(404)
@ -33,8 +42,7 @@ class Handler(BaseHTTPRequestHandler):
def _index(self) -> None:
items = []
for f in sorted(ROOT.glob("*.html")):
slug = f.stem
for slug in sorted(ARTICLES):
items.append(f'<li><a href="/article/{slug}">{slug}</a></li>')
body = (
"<!doctype html><html><body>"

View File

@ -0,0 +1,25 @@
from importlib import util
from pathlib import Path
def load_serve_module():
serve_path = (
Path(__file__).resolve().parents[1]
/ "tasks-public"
/ "assets"
/ "t3_web_research_and_cite"
/ "serve.py"
)
spec = util.spec_from_file_location("t3_web_research_serve", serve_path)
module = util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
def test_article_paths_resolve_only_known_article_slugs():
serve = load_serve_module()
assert serve.article_for_request_path("/article/01_grid_basics").name == "01_grid_basics.html"
assert serve.article_for_request_path("/article/../../serve.py") is None
assert serve.article_for_request_path("/article/%2e%2e/%2e%2e/serve.py") is None