# Koyeb-optimized Dockerfile using official vLLM OpenAI image
# Compatible with Koyeb's one-click deployment patterns for Qwen + vLLM
# Uses ENTRYPOINT to ensure args aren't overridden by Koyeb

FROM vllm/vllm-openai:latest

# Environment variables
ENV HF_HOME=/tmp/huggingface \
    VLLM_ATTENTION_BACKEND=FLASH_ATTN

# Create cache directories
RUN mkdir -p /tmp/huggingface && chmod 777 /tmp/huggingface

# Copy startup script
COPY start-vllm.sh /start-vllm.sh
RUN chmod +x /start-vllm.sh

# Expose vLLM default port
EXPOSE 8000

# Use ENTRYPOINT so it can't be overridden by empty Koyeb args
ENTRYPOINT ["/start-vllm.sh"]