Improve Docker configuration and gitignore
- Enhanced Dockerfile with security improvements and cleaner dependency management - Fixed requirements.txt to use correct package names - Updated gitignore to properly exclude output directory and contents 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -1,3 +1,4 @@
 | 
			
		||||
*.xml
 | 
			
		||||
.python-version
 | 
			
		||||
output/
 | 
			
		||||
output/*
 | 
			
		||||
							
								
								
									
										24
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								Dockerfile
									
									
									
									
									
								
							@@ -4,7 +4,7 @@ FROM python:3.12.7-slim-bullseye
 | 
			
		||||
# Set the working directory
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
 | 
			
		||||
# Install system dependencies needed for Playwright and its browsers
 | 
			
		||||
# Install system dependencies needed for Playwright and gosu
 | 
			
		||||
RUN apt-get update && apt-get install -y \
 | 
			
		||||
    bash \
 | 
			
		||||
    build-essential \
 | 
			
		||||
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y \
 | 
			
		||||
    ca-certificates \
 | 
			
		||||
    wget \
 | 
			
		||||
    gnupg \
 | 
			
		||||
    gosu \
 | 
			
		||||
    libnss3 \
 | 
			
		||||
    libatk-bridge2.0-0 \
 | 
			
		||||
    libx11-xcb1 \
 | 
			
		||||
@@ -36,16 +37,11 @@ RUN apt-get update && apt-get install -y \
 | 
			
		||||
    libdrm2 \
 | 
			
		||||
    && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
# Install Playwright and required Python dependencies
 | 
			
		||||
# Copy requirements and install Python dependencies
 | 
			
		||||
COPY requirements.txt .
 | 
			
		||||
RUN pip install --upgrade pip && \
 | 
			
		||||
    pip install \
 | 
			
		||||
    playwright \
 | 
			
		||||
    beautifulsoup4 \
 | 
			
		||||
    feedgen \
 | 
			
		||||
    pytz
 | 
			
		||||
    pip install -r requirements.txt
 | 
			
		||||
 | 
			
		||||
# Install only Chromium (faster than all browsers)
 | 
			
		||||
RUN playwright install chromium
 | 
			
		||||
 | 
			
		||||
# Create an entrypoint script to handle permissions (as root)
 | 
			
		||||
RUN echo '#!/bin/bash\n\
 | 
			
		||||
@@ -56,8 +52,6 @@ fi\n\
 | 
			
		||||
# Run as scraper user\n\
 | 
			
		||||
exec gosu scraper "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh
 | 
			
		||||
 | 
			
		||||
# Install gosu for user switching
 | 
			
		||||
RUN apt-get update && apt-get install -y gosu && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
# Create non-root user for security
 | 
			
		||||
RUN useradd -m -u 1001 scraper && \
 | 
			
		||||
@@ -69,13 +63,15 @@ RUN useradd -m -u 1001 scraper && \
 | 
			
		||||
COPY main.py .
 | 
			
		||||
RUN chown scraper:scraper main.py
 | 
			
		||||
 | 
			
		||||
# Set the environment variable to ensure Playwright works in the container
 | 
			
		||||
ENV PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright
 | 
			
		||||
# Set environment variables
 | 
			
		||||
ENV PYTHONUNBUFFERED=1 \
 | 
			
		||||
    PYTHONDONTWRITEBYTECODE=1 \
 | 
			
		||||
    PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright
 | 
			
		||||
 | 
			
		||||
# Don't switch user here - entrypoint will handle it
 | 
			
		||||
# USER scraper
 | 
			
		||||
 | 
			
		||||
# Install Chromium for the scraper user (only what we need)
 | 
			
		||||
# Install Chromium for the scraper user
 | 
			
		||||
USER scraper
 | 
			
		||||
RUN playwright install chromium
 | 
			
		||||
USER root
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
requests
 | 
			
		||||
bs4
 | 
			
		||||
beautifulsoup4
 | 
			
		||||
feedgen
 | 
			
		||||
playwright
 | 
			
		||||
pytz
 | 
			
		||||
		Reference in New Issue
	
	Block a user