OscarGD6's picture
Upload 3 files
b605dd4 verified
raw
history blame
2.17 kB
#FROM nvcr.io/nvidia/tritonserver:25.05-pyt-python-py3
FROM nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3
# ──────────────────────── SYSTEM SETUP ────────────────────────
USER root
RUN apt-get update -y && \
apt-get install -y git python3-pip && \
pip install gradio 'tritonclient[http]' && \
pip install -U --no-cache-dir vllm --extra-index-url https://download.pytorch.org/whl/cu128 && \
pip install flash-attn bitsandbytes
#pip install flash-attn flashinfer-python bitsandbytes
# ──────────────────────── HF / GIT IDENTITY ────────────────────────
RUN install -d -m 755 -o triton-server -g triton-server /home/triton-server && \
touch /home/triton-server/.gitconfig && \
chown triton-server:triton-server /home/triton-server/.gitconfig
USER triton-server
RUN git config --global user.email "[email protected]" && \
git config --global user.name "OscarGD6"
# ──────────────────────── HF CACHE MOUNT ────────────────────────
USER root
RUN mkdir -p /opt/triton_cache/hf && \
chown -R triton-server:triton-server /opt/triton_cache
ENV HF_HOME=/opt/triton_cache/hf
ENV TRANSFORMERS_CACHE=/opt/triton_cache/hf
ENV VLLM_USAGE_LOGGING=disable
# ──────────────────────── APP FILES ────────────────────────
WORKDIR /workspace
COPY model_repository ./model_repository
COPY launch.sh ./launch.sh
COPY app.py ./app.py
RUN chmod +x ./launch.sh
USER triton-server
# ──────────────────────── PORTS ────────────────────────
EXPOSE 8000 8001 8002 8008 7860
# ──────────────────────── STARTUP ────────────────────────
CMD ["./launch.sh"]