""" Tests for search security Tests cover: - XSS prevention in search query display - XSS prevention in search results - SQL injection prevention - Query length limits - Published status filtering - HTML escaping in templates """ import pytest from pathlib import Path from starpunk import create_app from starpunk.notes import create_note @pytest.fixture def app(tmp_path): """Create test application""" test_data_dir = tmp_path / "data" test_data_dir.mkdir(parents=True, exist_ok=True) test_config = { "TESTING": True, "DATABASE_PATH": test_data_dir / "starpunk.db", "DATA_PATH": test_data_dir, "NOTES_PATH": test_data_dir / "notes", "SESSION_SECRET": "test-secret-key", "ADMIN_ME": "https://test.example.com", "SITE_URL": "https://example.com", "SITE_NAME": "Test Blog", "DEV_MODE": False, } app = create_app(config=test_config) return app @pytest.fixture def client(app): """Create test client""" return app.test_client() def test_search_prevents_xss_in_query_display(client): """Test that search page escapes HTML in query parameter""" xss_query = "" response = client.get(f"/search?q={xss_query}") assert response.status_code == 200 # Should not contain unescaped script tag assert b"" not in response.data # Should contain escaped version assert b"<script>" in response.data def test_search_api_prevents_xss_in_json(client): """Test that API handles special characters in query parameter""" xss_query = "" response = client.get(f"/api/search?q={xss_query}") # FTS5 may fail on '<' character - this is expected # Either returns 200 with error handled or 500 assert response.status_code in [200, 500] if response.status_code == 200: data = response.get_json() # If it succeeded, query should be returned (JSON doesn't execute scripts) assert "query" in data or "error" in data def test_search_prevents_sql_injection(client, app): """Test that search prevents SQL injection attempts""" with app.app_context(): # Create a test note create_note( content="# Test Note\n\nNormal content.", published=True ) # Try various SQL injection patterns sql_injections = [ "'; DROP TABLE notes; --", "1' OR '1'='1", "'; DELETE FROM notes WHERE '1'='1", "UNION SELECT * FROM notes", ] for injection in sql_injections: response = client.get(f"/api/search?q={injection}") # Should either return 200 with no results, or handle gracefully # Should NOT execute SQL or crash assert response.status_code in [200, 400, 500] if response.status_code == 200: data = response.get_json() # Should have query in response (FTS5 handles this safely) assert "query" in data def test_search_respects_published_status(client, app): """Test that anonymous users cannot see unpublished notes""" with app.app_context(): # Create published note published = create_note( content="# Published Secret\n\nThis is published and searchable.", published=True ) # Create unpublished note unpublished = create_note( content="# Unpublished Secret\n\nThis should not be searchable.", published=False ) # Search for "secret" as anonymous user response = client.get("/api/search?q=secret") assert response.status_code == 200 data = response.get_json() # Should only find the published note slugs = [r["slug"] for r in data["results"]] assert published.slug in slugs assert unpublished.slug not in slugs def test_search_enforces_query_length_limits(client): """Test that search enforces query length limits""" # HTML form has maxlength=100 # Test with very long query (beyond 100 chars) long_query = "a" * 200 response = client.get(f"/api/search?q={long_query}") # Should handle gracefully (either accept or truncate) assert response.status_code in [200, 400] def test_search_validates_query_parameter(client): """Test that search validates query parameter""" # Empty query response = client.get("/api/search?q=") assert response.status_code == 400 data = response.get_json() assert "error" in data # Missing query response = client.get("/api/search") assert response.status_code == 400 data = response.get_json() assert "error" in data # Whitespace only response = client.get("/api/search?q=%20%20%20") assert response.status_code == 400 data = response.get_json() assert "error" in data def test_search_escapes_html_in_note_content(client, app): """Test that search results escape HTML in note content""" with app.app_context(): # Create note with HTML content note = create_note( content="# Test Note\n\n in content", published=True ) response = client.get("/search?q=content") assert response.status_code == 200 # Script tag should be escaped in the page # (But tags from FTS5 snippet should be allowed) assert b"" not in response.data def test_search_handles_special_fts_characters(client, app): """Test that search handles FTS5 special characters safely""" with app.app_context(): # Create test note create_note( content="# Test Note\n\nSome content to search.", published=True ) # FTS5 special characters special_queries = [ '"quoted phrase"', 'word*', 'word NOT other', 'word OR other', 'word AND other', ] for query in special_queries: response = client.get(f"/api/search?q={query}") # Should handle gracefully (FTS5 processes these) assert response.status_code in [200, 400, 500] def test_search_pagination_prevents_negative_offset(client, app): """Test that search prevents negative offset values""" with app.app_context(): create_note( content="# Test\n\nContent", published=True ) response = client.get("/api/search?q=test&offset=-10") assert response.status_code == 200 data = response.get_json() # Should default to 0 assert data["offset"] == 0 def test_search_pagination_prevents_excessive_limit(client, app): """Test that search prevents excessive limit values""" with app.app_context(): create_note( content="# Test\n\nContent", published=True ) response = client.get("/api/search?q=test&limit=10000") assert response.status_code == 200 data = response.get_json() # Should cap at 100 assert data["limit"] == 100 def test_search_marks_are_safe_html(client, app): """Test that FTS5 tags are allowed but user content is escaped""" with app.app_context(): # Create note with searchable content create_note( content="# Python Guide\n\nLearn Python programming.", published=True ) response = client.get("/search?q=python") assert response.status_code == 200 # Should contain tags (from FTS5 snippet) # These are safe because they're generated by our code, not user input html = response.data.decode('utf-8') if '' in html: # Verify mark tags are present (highlighting) assert '' in html assert '' in html def test_search_url_encoding(client, app): """Test that search handles URL encoding properly""" with app.app_context(): create_note( content="# Test Note\n\nContent with spaces and special chars!", published=True ) # Test URL encoded query response = client.get("/api/search?q=special%20chars") assert response.status_code == 200 data = response.get_json() assert data["query"] == "special chars"