Spaces:

OscarGD6
/

nutrition-table-detector-triton-vllm-backend

Paused

App Files Files Community

nutrition-table-detector-triton-vllm-backend / Dockerfile

OscarGD6's picture

Upload 3 files

b605dd4 verified 5 months ago

2.17 kB

	#FROM nvcr.io/nvidia/tritonserver:25.05-pyt-python-py3
	FROM nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3

	# ──────────────────────── SYSTEM SETUP ────────────────────────
	USER root
	RUN apt-get update -y && \
	apt-get install -y git python3-pip && \
	pip install gradio 'tritonclient[http]' && \
	pip install -U --no-cache-dir vllm --extra-index-url https://download.pytorch.org/whl/cu128 && \
	pip install flash-attn bitsandbytes
	#pip install flash-attn flashinfer-python bitsandbytes

	# ──────────────────────── HF / GIT IDENTITY ────────────────────────
	RUN install -d -m 755 -o triton-server -g triton-server /home/triton-server && \
	touch /home/triton-server/.gitconfig && \
	chown triton-server:triton-server /home/triton-server/.gitconfig

	USER triton-server
	RUN git config --global user.email "[email protected]" && \
	git config --global user.name "OscarGD6"

	# ──────────────────────── HF CACHE MOUNT ────────────────────────
	USER root
	RUN mkdir -p /opt/triton_cache/hf && \
	chown -R triton-server:triton-server /opt/triton_cache

	ENV HF_HOME=/opt/triton_cache/hf
	ENV TRANSFORMERS_CACHE=/opt/triton_cache/hf
	ENV VLLM_USAGE_LOGGING=disable

	# ──────────────────────── APP FILES ────────────────────────
	WORKDIR /workspace
	COPY model_repository ./model_repository
	COPY launch.sh ./launch.sh
	COPY app.py ./app.py
	RUN chmod +x ./launch.sh
	USER triton-server

	# ──────────────────────── PORTS ────────────────────────
	EXPOSE 8000 8001 8002 8008 7860

	# ──────────────────────── STARTUP ────────────────────────
	CMD ["./launch.sh"]