| #FROM nvcr.io/nvidia/tritonserver:25.05-pyt-python-py3 | |
| FROM nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3 | |
| # ββββββββββββββββββββββββ SYSTEM SETUP ββββββββββββββββββββββββ | |
| USER root | |
| RUN apt-get update -y && \ | |
| apt-get install -y git python3-pip && \ | |
| pip install gradio 'tritonclient[http]' && \ | |
| pip install -U --no-cache-dir vllm --extra-index-url https://download.pytorch.org/whl/cu128 && \ | |
| pip install flash-attn bitsandbytes | |
| #pip install flash-attn flashinfer-python bitsandbytes | |
| # ββββββββββββββββββββββββ HF / GIT IDENTITY ββββββββββββββββββββββββ | |
| RUN install -d -m 755 -o triton-server -g triton-server /home/triton-server && \ | |
| touch /home/triton-server/.gitconfig && \ | |
| chown triton-server:triton-server /home/triton-server/.gitconfig | |
| USER triton-server | |
| RUN git config --global user.email "[email protected]" && \ | |
| git config --global user.name "OscarGD6" | |
| # ββββββββββββββββββββββββ HF CACHE MOUNT ββββββββββββββββββββββββ | |
| USER root | |
| RUN mkdir -p /opt/triton_cache/hf && \ | |
| chown -R triton-server:triton-server /opt/triton_cache | |
| ENV HF_HOME=/opt/triton_cache/hf | |
| ENV TRANSFORMERS_CACHE=/opt/triton_cache/hf | |
| ENV VLLM_USAGE_LOGGING=disable | |
| # ββββββββββββββββββββββββ APP FILES ββββββββββββββββββββββββ | |
| WORKDIR /workspace | |
| COPY model_repository ./model_repository | |
| COPY launch.sh ./launch.sh | |
| COPY app.py ./app.py | |
| RUN chmod +x ./launch.sh | |
| USER triton-server | |
| # ββββββββββββββββββββββββ PORTS ββββββββββββββββββββββββ | |
| EXPOSE 8000 8001 8002 8008 7860 | |
| # ββββββββββββββββββββββββ STARTUP ββββββββββββββββββββββββ | |
| CMD ["./launch.sh"] | |