- Enhanced Dockerfile with security improvements and cleaner dependency management - Fixed requirements.txt to use correct package names - Updated gitignore to properly exclude output directory and contents 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
		
			
				
	
	
		
			81 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Docker
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Docker
		
	
	
	
	
	
# Use the official Python 3.12.7 Slim image as the base
 | 
						|
FROM python:3.12.7-slim-bullseye
 | 
						|
 | 
						|
# Set the working directory
 | 
						|
WORKDIR /app
 | 
						|
 | 
						|
# Install system dependencies needed for Playwright and gosu
 | 
						|
RUN apt-get update && apt-get install -y \
 | 
						|
    bash \
 | 
						|
    build-essential \
 | 
						|
    libffi-dev \
 | 
						|
    git \
 | 
						|
    curl \
 | 
						|
    ca-certificates \
 | 
						|
    wget \
 | 
						|
    gnupg \
 | 
						|
    gosu \
 | 
						|
    libnss3 \
 | 
						|
    libatk-bridge2.0-0 \
 | 
						|
    libx11-xcb1 \
 | 
						|
    libxcomposite1 \
 | 
						|
    libxcursor1 \
 | 
						|
    libxdamage1 \
 | 
						|
    libxi6 \
 | 
						|
    libxtst6 \
 | 
						|
    libappindicator3-1 \
 | 
						|
    libxrandr2 \
 | 
						|
    xdg-utils \
 | 
						|
    libgbm1 \
 | 
						|
    libpango-1.0-0 \
 | 
						|
    libasound2 \
 | 
						|
    libpangocairo-1.0-0 \
 | 
						|
    libxshmfence1 \
 | 
						|
    libx11-6 \
 | 
						|
    libatk1.0-0 \
 | 
						|
    libgtk-3-0 \
 | 
						|
    libdrm2 \
 | 
						|
    && rm -rf /var/lib/apt/lists/*
 | 
						|
 | 
						|
# Copy requirements and install Python dependencies
 | 
						|
COPY requirements.txt .
 | 
						|
RUN pip install --upgrade pip && \
 | 
						|
    pip install -r requirements.txt
 | 
						|
 | 
						|
 | 
						|
# Create an entrypoint script to handle permissions (as root)
 | 
						|
RUN echo '#!/bin/bash\n\
 | 
						|
# Fix permissions for mounted volumes\n\
 | 
						|
if [ -d "/app/output" ]; then\n\
 | 
						|
    chmod 777 /app/output 2>/dev/null || true\n\
 | 
						|
fi\n\
 | 
						|
# Run as scraper user\n\
 | 
						|
exec gosu scraper "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh
 | 
						|
 | 
						|
 | 
						|
# Create non-root user for security
 | 
						|
RUN useradd -m -u 1001 scraper && \
 | 
						|
    mkdir -p /app/output && \
 | 
						|
    chown -R scraper:scraper /app && \
 | 
						|
    chmod 755 /app/output
 | 
						|
 | 
						|
# Copy the Python script to the container
 | 
						|
COPY main.py .
 | 
						|
RUN chown scraper:scraper main.py
 | 
						|
 | 
						|
# Set environment variables
 | 
						|
ENV PYTHONUNBUFFERED=1 \
 | 
						|
    PYTHONDONTWRITEBYTECODE=1 \
 | 
						|
    PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright
 | 
						|
 | 
						|
# Don't switch user here - entrypoint will handle it
 | 
						|
# USER scraper
 | 
						|
 | 
						|
# Install Chromium for the scraper user
 | 
						|
USER scraper
 | 
						|
RUN playwright install chromium
 | 
						|
USER root
 | 
						|
 | 
						|
ENTRYPOINT ["/entrypoint.sh"]
 | 
						|
CMD ["python", "main.py"]
 |