Add comprehensive security improvements
- URL validation with domain whitelist - Path validation to prevent directory traversal - Resource limits (content size, scroll iterations) - Content filtering and sanitization - Non-root Docker execution with gosu - Configurable output directory via CLI/env vars - Fixed Docker volume permission issues 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
35
Dockerfile
35
Dockerfile
@ -44,14 +44,41 @@ RUN pip install --upgrade pip && \
|
||||
feedgen \
|
||||
pytz
|
||||
|
||||
# Install Playwright browser binaries
|
||||
RUN playwright install
|
||||
# Install only Chromium (faster than all browsers)
|
||||
RUN playwright install chromium
|
||||
|
||||
# Create an entrypoint script to handle permissions (as root)
|
||||
RUN echo '#!/bin/bash\n\
|
||||
# Fix permissions for mounted volumes\n\
|
||||
if [ -d "/app/output" ]; then\n\
|
||||
chmod 777 /app/output 2>/dev/null || true\n\
|
||||
fi\n\
|
||||
# Run as scraper user\n\
|
||||
exec gosu scraper "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh
|
||||
|
||||
# Install gosu for user switching
|
||||
RUN apt-get update && apt-get install -y gosu && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create non-root user for security
|
||||
RUN useradd -m -u 1001 scraper && \
|
||||
mkdir -p /app/output && \
|
||||
chown -R scraper:scraper /app && \
|
||||
chmod 755 /app/output
|
||||
|
||||
# Copy the Python script to the container
|
||||
COPY main.py .
|
||||
RUN chown scraper:scraper main.py
|
||||
|
||||
# Set the environment variable to ensure Playwright works in the container
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright
|
||||
|
||||
# Command to run the Python script
|
||||
# Don't switch user here - entrypoint will handle it
|
||||
# USER scraper
|
||||
|
||||
# Install Chromium for the scraper user (only what we need)
|
||||
USER scraper
|
||||
RUN playwright install chromium
|
||||
USER root
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["python", "main.py"]
|
||||
|
Reference in New Issue
Block a user