# Use the official Python 3.12.7 Slim image as the base FROM python:3.12.7-slim-bullseye # Set the working directory WORKDIR /app # Install system dependencies needed for Playwright and its browsers RUN apt-get update && apt-get install -y \ bash \ build-essential \ libffi-dev \ git \ curl \ ca-certificates \ wget \ gnupg \ libnss3 \ libatk-bridge2.0-0 \ libx11-xcb1 \ libxcomposite1 \ libxcursor1 \ libxdamage1 \ libxi6 \ libxtst6 \ libappindicator3-1 \ libxrandr2 \ xdg-utils \ libgbm1 \ libpango-1.0-0 \ libasound2 \ libpangocairo-1.0-0 \ libxshmfence1 \ libx11-6 \ libatk1.0-0 \ libgtk-3-0 \ libdrm2 \ && rm -rf /var/lib/apt/lists/* # Install Playwright and required Python dependencies RUN pip install --upgrade pip && \ pip install \ playwright \ beautifulsoup4 \ feedgen \ pytz # Install only Chromium (faster than all browsers) RUN playwright install chromium # Create an entrypoint script to handle permissions (as root) RUN echo '#!/bin/bash\n\ # Fix permissions for mounted volumes\n\ if [ -d "/app/output" ]; then\n\ chmod 777 /app/output 2>/dev/null || true\n\ fi\n\ # Run as scraper user\n\ exec gosu scraper "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh # Install gosu for user switching RUN apt-get update && apt-get install -y gosu && rm -rf /var/lib/apt/lists/* # Create non-root user for security RUN useradd -m -u 1001 scraper && \ mkdir -p /app/output && \ chown -R scraper:scraper /app && \ chmod 755 /app/output # Copy the Python script to the container COPY main.py . RUN chown scraper:scraper main.py # Set the environment variable to ensure Playwright works in the container ENV PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright # Don't switch user here - entrypoint will handle it # USER scraper # Install Chromium for the scraper user (only what we need) USER scraper RUN playwright install chromium USER root ENTRYPOINT ["/entrypoint.sh"] CMD ["python", "main.py"]