Spaces:
Running
on
Zero
Running
on
Zero
linhaotong
commited on
Commit
·
4845d25
1
Parent(s):
b4fbfcd
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- DEPLOYMENT_CHECKLIST.md +323 -0
- GSPLAT_SOLUTIONS.md +348 -0
- HF_SPACES_BUILD.md +306 -0
- PYTHON_VERSION_CONFIG.md +290 -0
- README.md +1 -0
- SPACES_SETUP.md +190 -0
- app.py +73 -0
- example_spaces_gpu.py +52 -0
- packages.txt +3 -0
- pyproject.toml +93 -0
- requirements-basic.txt +41 -0
- requirements.txt +38 -0
- runtime.txt +2 -0
- src/depth_anything_3/__pycache__/api.cpython-311.pyc +0 -0
- src/depth_anything_3/__pycache__/cfg.cpython-311.pyc +0 -0
- src/depth_anything_3/__pycache__/cli.cpython-311.pyc +0 -0
- src/depth_anything_3/__pycache__/registry.cpython-311.pyc +0 -0
- src/depth_anything_3/__pycache__/specs.cpython-311.pyc +0 -0
- src/depth_anything_3/api.py +414 -0
- src/depth_anything_3/app/__pycache__/css_and_html.cpython-311.pyc +0 -0
- src/depth_anything_3/app/__pycache__/gradio_app.cpython-311.pyc +0 -0
- src/depth_anything_3/app/css_and_html.py +594 -0
- src/depth_anything_3/app/gradio_app.py +747 -0
- src/depth_anything_3/app/modules/__init__.py +45 -0
- src/depth_anything_3/app/modules/__pycache__/__init__.cpython-311.pyc +0 -0
- src/depth_anything_3/app/modules/__pycache__/event_handlers.cpython-311.pyc +0 -0
- src/depth_anything_3/app/modules/__pycache__/file_handlers.cpython-311.pyc +0 -0
- src/depth_anything_3/app/modules/__pycache__/model_inference.cpython-311.pyc +0 -0
- src/depth_anything_3/app/modules/__pycache__/ui_components.cpython-311.pyc +0 -0
- src/depth_anything_3/app/modules/__pycache__/utils.cpython-311.pyc +0 -0
- src/depth_anything_3/app/modules/__pycache__/visualization.cpython-311.pyc +0 -0
- src/depth_anything_3/app/modules/event_handlers.py +629 -0
- src/depth_anything_3/app/modules/file_handlers.py +304 -0
- src/depth_anything_3/app/modules/model_inference.py +286 -0
- src/depth_anything_3/app/modules/ui_components.py +474 -0
- src/depth_anything_3/app/modules/utils.py +211 -0
- src/depth_anything_3/app/modules/visualization.py +434 -0
- src/depth_anything_3/cfg.py +144 -0
- src/depth_anything_3/cli.py +742 -0
- src/depth_anything_3/configs/da3-base.yaml +45 -0
- src/depth_anything_3/configs/da3-giant.yaml +71 -0
- src/depth_anything_3/configs/da3-large.yaml +45 -0
- src/depth_anything_3/configs/da3-small.yaml +45 -0
- src/depth_anything_3/configs/da3metric-large.yaml +28 -0
- src/depth_anything_3/configs/da3mono-large.yaml +28 -0
- src/depth_anything_3/configs/da3nested-giant-large.yaml +10 -0
- src/depth_anything_3/model/__init__.py +20 -0
- src/depth_anything_3/model/__pycache__/__init__.cpython-311.pyc +0 -0
- src/depth_anything_3/model/__pycache__/cam_dec.cpython-311.pyc +0 -0
- src/depth_anything_3/model/__pycache__/cam_enc.cpython-311.pyc +0 -0
DEPLOYMENT_CHECKLIST.md
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Hugging Face Spaces 部署检查清单
|
| 2 |
+
|
| 3 |
+
## ✅ 当前配置状态
|
| 4 |
+
|
| 5 |
+
### 核心文件(必需)
|
| 6 |
+
|
| 7 |
+
- ✅ **app.py** - 入口文件,带 `@spaces.GPU` 装饰器
|
| 8 |
+
- ✅ **requirements.txt** - Python 依赖(包含 gsplat)
|
| 9 |
+
- ✅ **README.md** - Space 配置(Python 3.11)
|
| 10 |
+
- ✅ **packages.txt** - 系统依赖(build-essential, git)
|
| 11 |
+
- ✅ **pyproject.toml** - 项目配置
|
| 12 |
+
|
| 13 |
+
### 备用文件(可选)
|
| 14 |
+
|
| 15 |
+
- ✅ **requirements-basic.txt** - 不包含 gsplat 的版本(如果构建失败)
|
| 16 |
+
- ✅ **runtime.txt** - Python 版本备用配置
|
| 17 |
+
- ✅ **GSPLAT_SOLUTIONS.md** - gsplat 问题解决方案
|
| 18 |
+
- ✅ **SPACES_SETUP.md** - 详细部署指南
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
## 📋 部署前检查
|
| 23 |
+
|
| 24 |
+
### 1. 文件检查
|
| 25 |
+
|
| 26 |
+
```bash
|
| 27 |
+
# 确认所有必需文件存在
|
| 28 |
+
[ -f app.py ] && echo "✅ app.py" || echo "❌ app.py missing"
|
| 29 |
+
[ -f requirements.txt ] && echo "✅ requirements.txt" || echo "❌ requirements.txt missing"
|
| 30 |
+
[ -f README.md ] && echo "✅ README.md" || echo "❌ README.md missing"
|
| 31 |
+
[ -d src/depth_anything_3 ] && echo "✅ Source code" || echo "❌ Source code missing"
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
### 2. 配置检查
|
| 35 |
+
|
| 36 |
+
**README.md 必须包含:**
|
| 37 |
+
```yaml
|
| 38 |
+
---
|
| 39 |
+
sdk: gradio
|
| 40 |
+
app_file: app.py
|
| 41 |
+
python_version: 3.11
|
| 42 |
+
---
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
**requirements.txt 必须包含:**
|
| 46 |
+
```txt
|
| 47 |
+
torch>=2.0.0
|
| 48 |
+
gradio>=5.0.0
|
| 49 |
+
spaces
|
| 50 |
+
gsplat @ git+https://... # 如果需要 3DGS
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
**app.py 必须包含:**
|
| 54 |
+
```python
|
| 55 |
+
import spaces
|
| 56 |
+
@spaces.GPU(duration=120)
|
| 57 |
+
def gpu_run_inference(self, *args, **kwargs):
|
| 58 |
+
...
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### 3. 本地测试(推荐)
|
| 62 |
+
|
| 63 |
+
```bash
|
| 64 |
+
# 测试 Python 版本
|
| 65 |
+
python --version # 应该是 3.11+
|
| 66 |
+
|
| 67 |
+
# 测试安装依赖
|
| 68 |
+
pip install -r requirements.txt
|
| 69 |
+
|
| 70 |
+
# 测试应用启动
|
| 71 |
+
python app.py
|
| 72 |
+
|
| 73 |
+
# 测试 gsplat(如果需要)
|
| 74 |
+
python -c "import gsplat; print('✅ gsplat OK')"
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
## 🎯 部署步骤
|
| 80 |
+
|
| 81 |
+
### 方式 A:通过网页界面
|
| 82 |
+
|
| 83 |
+
1. **创建 Space**
|
| 84 |
+
- 访问 https://huggingface.co/new-space
|
| 85 |
+
- Space name: 输入名称
|
| 86 |
+
- SDK: 选择 **Gradio**
|
| 87 |
+
- Hardware: 选择 **GPU (T4 或更高)**
|
| 88 |
+
- Visibility: Public/Private
|
| 89 |
+
|
| 90 |
+
2. **上传文件**
|
| 91 |
+
- 上传所有文件(app.py, requirements.txt, src/, 等)
|
| 92 |
+
- 或者通过 Git 克隆上传
|
| 93 |
+
|
| 94 |
+
3. **等待构建**
|
| 95 |
+
- 查看 "Build logs" 标签
|
| 96 |
+
- 首次构建可能需要 10-20 分钟(因为 gsplat)
|
| 97 |
+
|
| 98 |
+
4. **测试应用**
|
| 99 |
+
- 构建成功后自动启动
|
| 100 |
+
- 测试所有功能
|
| 101 |
+
|
| 102 |
+
### 方式 B:通过 Git
|
| 103 |
+
|
| 104 |
+
```bash
|
| 105 |
+
# 1. 创建 Space(通过网页)
|
| 106 |
+
|
| 107 |
+
# 2. 克隆 Space 仓库
|
| 108 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
|
| 109 |
+
cd YOUR_SPACE_NAME
|
| 110 |
+
|
| 111 |
+
# 3. 复制文件
|
| 112 |
+
cp -r /path/to/depth-anything-3/* .
|
| 113 |
+
|
| 114 |
+
# 4. 提交并推送
|
| 115 |
+
git add .
|
| 116 |
+
git commit -m "Initial deployment"
|
| 117 |
+
git push
|
| 118 |
+
|
| 119 |
+
# 5. 查看构建日志
|
| 120 |
+
# 在网页界面查看
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
---
|
| 124 |
+
|
| 125 |
+
## 🐛 常见问题快速解决
|
| 126 |
+
|
| 127 |
+
### 问题 1:gsplat 构建失败 ⚠️
|
| 128 |
+
|
| 129 |
+
**症状:**
|
| 130 |
+
```
|
| 131 |
+
Building wheel for gsplat (setup.py) ... error
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
**快速修复:**
|
| 135 |
+
```bash
|
| 136 |
+
# 方法 1: 切换到不含 gsplat 的版本
|
| 137 |
+
mv requirements.txt requirements-full.txt
|
| 138 |
+
mv requirements-basic.txt requirements.txt
|
| 139 |
+
git commit -am "Use basic requirements without gsplat"
|
| 140 |
+
git push
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
**或者在网页界面:**
|
| 144 |
+
1. 打开 requirements.txt
|
| 145 |
+
2. 注释掉 gsplat 那行:`# gsplat @ git+...`
|
| 146 |
+
3. 提交更改
|
| 147 |
+
|
| 148 |
+
详见:`GSPLAT_SOLUTIONS.md`
|
| 149 |
+
|
| 150 |
+
### 问题 2:构建超时
|
| 151 |
+
|
| 152 |
+
**症状:**
|
| 153 |
+
```
|
| 154 |
+
Build timeout after 60 minutes
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
**解决方法:**
|
| 158 |
+
1. 使用 requirements-basic.txt(不含 gsplat)
|
| 159 |
+
2. 或者联系 HF 支持增加构建时间限制
|
| 160 |
+
|
| 161 |
+
### 问题 3:应用启动失败
|
| 162 |
+
|
| 163 |
+
**症状:**
|
| 164 |
+
```
|
| 165 |
+
ModuleNotFoundError: No module named 'depth_anything_3'
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
**解决方法:**
|
| 169 |
+
1. 确认 `src/` 目录结构正确
|
| 170 |
+
2. 在 app.py 开头添加:
|
| 171 |
+
```python
|
| 172 |
+
import sys
|
| 173 |
+
sys.path.append('./src')
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### 问题 4:GPU 不可用
|
| 177 |
+
|
| 178 |
+
**症状:**
|
| 179 |
+
```
|
| 180 |
+
torch.cuda.is_available() = False
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
+
**解决方法:**
|
| 184 |
+
1. 确认 Space 硬件选择了 **GPU**(不是 CPU)
|
| 185 |
+
2. 在 Settings 中切换到 GPU 硬件
|
| 186 |
+
3. 可能需要付费 GPU(T4 是最便宜的)
|
| 187 |
+
|
| 188 |
+
---
|
| 189 |
+
|
| 190 |
+
## 📊 构建时间预估
|
| 191 |
+
|
| 192 |
+
| 配置 | 首次构建 | 后续构建 | 启动时间 |
|
| 193 |
+
|------|---------|---------|---------|
|
| 194 |
+
| 含 gsplat | 15-25 分钟 | 2-5 分钟* | 30-60 秒 |
|
| 195 |
+
| 不含 gsplat | 5-10 分钟 | 1-2 分钟* | 20-40 秒 |
|
| 196 |
+
|
| 197 |
+
*后续构建可能使用缓存
|
| 198 |
+
|
| 199 |
+
---
|
| 200 |
+
|
| 201 |
+
## 🎓 部署后测试清单
|
| 202 |
+
|
| 203 |
+
### 基础功能
|
| 204 |
+
|
| 205 |
+
- [ ] 应用成功启动
|
| 206 |
+
- [ ] 可以访问 Space URL
|
| 207 |
+
- [ ] UI 正常显示
|
| 208 |
+
- [ ] 可以上传图片/视频
|
| 209 |
+
|
| 210 |
+
### 深度估计功能
|
| 211 |
+
|
| 212 |
+
- [ ] 可以运行深度估计
|
| 213 |
+
- [ ] 结果正确显示
|
| 214 |
+
- [ ] Point Cloud 可视化正常
|
| 215 |
+
- [ ] 相机姿态显示正常
|
| 216 |
+
|
| 217 |
+
### 3DGS 功能(如果启用 gsplat)
|
| 218 |
+
|
| 219 |
+
- [ ] 3DGS 选项可见
|
| 220 |
+
- [ ] 可以生成 3DGS 视频
|
| 221 |
+
- [ ] 视频可以播放
|
| 222 |
+
|
| 223 |
+
### 性能测试
|
| 224 |
+
|
| 225 |
+
- [ ] GPU 正确识别
|
| 226 |
+
- [ ] 推理速度合理(不超时)
|
| 227 |
+
- [ ] 内存使用正常
|
| 228 |
+
|
| 229 |
+
---
|
| 230 |
+
|
| 231 |
+
## 💾 配置文件快速参考
|
| 232 |
+
|
| 233 |
+
### README.md
|
| 234 |
+
```yaml
|
| 235 |
+
---
|
| 236 |
+
title: Depth Anything 3
|
| 237 |
+
sdk: gradio
|
| 238 |
+
sdk_version: 5.49.1
|
| 239 |
+
app_file: app.py
|
| 240 |
+
python_version: 3.11
|
| 241 |
+
---
|
| 242 |
+
```
|
| 243 |
+
|
| 244 |
+
### app.py 关键部分
|
| 245 |
+
```python
|
| 246 |
+
import spaces
|
| 247 |
+
from depth_anything_3.app.gradio_app import DepthAnything3App
|
| 248 |
+
|
| 249 |
+
original_run_inference = ModelInference.run_inference
|
| 250 |
+
|
| 251 |
+
@spaces.GPU(duration=120)
|
| 252 |
+
def gpu_run_inference(self, *args, **kwargs):
|
| 253 |
+
return original_run_inference(self, *args, **kwargs)
|
| 254 |
+
|
| 255 |
+
ModelInference.run_inference = gpu_run_inference
|
| 256 |
+
|
| 257 |
+
if __name__ == "__main__":
|
| 258 |
+
app = DepthAnything3App(...)
|
| 259 |
+
app.launch(host="0.0.0.0", port=7860)
|
| 260 |
+
```
|
| 261 |
+
|
| 262 |
+
### requirements.txt 关键依赖
|
| 263 |
+
```txt
|
| 264 |
+
torch>=2.0.0
|
| 265 |
+
gradio>=5.0.0
|
| 266 |
+
spaces
|
| 267 |
+
gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70
|
| 268 |
+
```
|
| 269 |
+
|
| 270 |
+
### packages.txt
|
| 271 |
+
```txt
|
| 272 |
+
build-essential
|
| 273 |
+
git
|
| 274 |
+
```
|
| 275 |
+
|
| 276 |
+
---
|
| 277 |
+
|
| 278 |
+
## 🔗 相关文档
|
| 279 |
+
|
| 280 |
+
本项目的详细文档:
|
| 281 |
+
|
| 282 |
+
1. **SPACES_SETUP.md** - 完整部署指南和 Spaces 机制说明
|
| 283 |
+
2. **GSPLAT_SOLUTIONS.md** - gsplat 安装的各种解决方案
|
| 284 |
+
3. **HF_SPACES_BUILD.md** - HF Spaces 构建流程详解
|
| 285 |
+
4. **PYTHON_VERSION_CONFIG.md** - Python 版本配置说明
|
| 286 |
+
|
| 287 |
+
外部资源:
|
| 288 |
+
|
| 289 |
+
- [HF Spaces 文档](https://huggingface.co/docs/hub/spaces)
|
| 290 |
+
- [Gradio 文档](https://gradio.app/docs)
|
| 291 |
+
- [gsplat GitHub](https://github.com/nerfstudio-project/gsplat)
|
| 292 |
+
|
| 293 |
+
---
|
| 294 |
+
|
| 295 |
+
## 📞 获取帮助
|
| 296 |
+
|
| 297 |
+
如果遇到问题:
|
| 298 |
+
|
| 299 |
+
1. **查看构建日志** - Space 页面的 "Build logs" 标签
|
| 300 |
+
2. **查看运行日志** - Space 页面的 "Logs" 标签
|
| 301 |
+
3. **参考文档** - 本项目的 *.md 文档
|
| 302 |
+
4. **HF 论坛** - https://discuss.huggingface.co/
|
| 303 |
+
5. **GitHub Issues** - 项目的 Issues 页面
|
| 304 |
+
|
| 305 |
+
---
|
| 306 |
+
|
| 307 |
+
## ✨ 成功部署后
|
| 308 |
+
|
| 309 |
+
恭喜!🎉 你的 Depth Anything 3 应用已经在 HF Spaces 上运行了!
|
| 310 |
+
|
| 311 |
+
**下一步:**
|
| 312 |
+
|
| 313 |
+
1. 📝 更新 README.md 添加使用说明
|
| 314 |
+
2. 🎨 自定义 UI(如果需要)
|
| 315 |
+
3. 📊 监控使用情况
|
| 316 |
+
4. 🔄 根据反馈持续改进
|
| 317 |
+
|
| 318 |
+
**分享你的 Space:**
|
| 319 |
+
- Space URL: `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
|
| 320 |
+
- 可以嵌入到网页、博客等
|
| 321 |
+
|
| 322 |
+
祝你使用愉快!🚀
|
| 323 |
+
|
GSPLAT_SOLUTIONS.md
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# gsplat 安装解决方案
|
| 2 |
+
|
| 3 |
+
## 🎯 问题描述
|
| 4 |
+
|
| 5 |
+
`gsplat` 是一个 CUDA 加速的 3D Gaussian Splatting 库,从源码安装可能在 HF Spaces 遇到问题。
|
| 6 |
+
|
| 7 |
+
## ✅ 解决方案(按推荐顺序)
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## 方案 1️⃣:直接从 GitHub 安装 ⭐ (已配置)
|
| 12 |
+
|
| 13 |
+
**requirements.txt:**
|
| 14 |
+
```txt
|
| 15 |
+
gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
**优点:**
|
| 19 |
+
- ✅ 使用特定版本,稳定
|
| 20 |
+
- ✅ 最新功能
|
| 21 |
+
- ✅ 与你的代码兼容
|
| 22 |
+
|
| 23 |
+
**缺点:**
|
| 24 |
+
- ⚠️ 构建时间长(5-15 分钟)
|
| 25 |
+
- ⚠️ 需要 CUDA 在构建时
|
| 26 |
+
- ⚠️ 可能构建失败
|
| 27 |
+
|
| 28 |
+
**测试方法:**
|
| 29 |
+
```bash
|
| 30 |
+
# 本地测试(确保有 GPU)
|
| 31 |
+
pip install 'gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70'
|
| 32 |
+
python -c "import gsplat; print(gsplat.__version__)"
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
**HF Spaces 配置建议:**
|
| 36 |
+
|
| 37 |
+
如果构建失败,需要在 Space 设置中:
|
| 38 |
+
1. 选择 **GPU Space**(不是 CPU Space)
|
| 39 |
+
2. GPU 类型选择至少 **T4** 或更高
|
| 40 |
+
3. 在构建阶段就需要 GPU
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## 方案 2️⃣:使用预编译 Wheel(如果可用)
|
| 45 |
+
|
| 46 |
+
**检查是否有预编译版本:**
|
| 47 |
+
```bash
|
| 48 |
+
pip index versions gsplat
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
如果有 PyPI 版本,修改 requirements.txt:
|
| 52 |
+
```txt
|
| 53 |
+
# 使用 PyPI 版本(更快)
|
| 54 |
+
gsplat>=0.1.0
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
**优点:**
|
| 58 |
+
- ✅ 安装快速(秒级)
|
| 59 |
+
- ✅ 不需要编译
|
| 60 |
+
- ✅ 更稳定
|
| 61 |
+
|
| 62 |
+
**缺点:**
|
| 63 |
+
- ⚠️ 可能版本较旧
|
| 64 |
+
- ⚠️ 可能没有预编译版本
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
## 方案 3️⃣:延迟加载 gsplat(推荐备用方案)⭐
|
| 69 |
+
|
| 70 |
+
如果构建失败,修改代码让 gsplat 变成可选依赖:
|
| 71 |
+
|
| 72 |
+
### 步骤 1:修改 requirements.txt
|
| 73 |
+
|
| 74 |
+
创建两个文件:
|
| 75 |
+
|
| 76 |
+
**requirements.txt** (基础依赖):
|
| 77 |
+
```txt
|
| 78 |
+
torch>=2.0.0
|
| 79 |
+
gradio>=5.0.0
|
| 80 |
+
spaces
|
| 81 |
+
# ... 其他基础依赖
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
**requirements-gsplat.txt** (可选依赖):
|
| 85 |
+
```txt
|
| 86 |
+
-r requirements.txt
|
| 87 |
+
gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
### 步骤 2:修改代码,使 gsplat 可选
|
| 91 |
+
|
| 92 |
+
**depth_anything_3/utils/export/gs.py** (或相关文件):
|
| 93 |
+
```python
|
| 94 |
+
# 在文件开头
|
| 95 |
+
try:
|
| 96 |
+
import gsplat
|
| 97 |
+
GSPLAT_AVAILABLE = True
|
| 98 |
+
except ImportError:
|
| 99 |
+
GSPLAT_AVAILABLE = False
|
| 100 |
+
print("⚠️ gsplat not installed. 3DGS features will be disabled.")
|
| 101 |
+
|
| 102 |
+
def export_to_gs_video(*args, **kwargs):
|
| 103 |
+
if not GSPLAT_AVAILABLE:
|
| 104 |
+
raise RuntimeError(
|
| 105 |
+
"gsplat is not installed. Please install it with:\n"
|
| 106 |
+
"pip install 'gsplat @ git+https://github.com/...'"
|
| 107 |
+
)
|
| 108 |
+
# 原有代码...
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
**app.py** (或 gradio_app.py):
|
| 112 |
+
```python
|
| 113 |
+
from depth_anything_3.utils.export.gs import GSPLAT_AVAILABLE
|
| 114 |
+
|
| 115 |
+
# 在 UI 中隐藏 3DGS 选项如果不可用
|
| 116 |
+
if GSPLAT_AVAILABLE:
|
| 117 |
+
infer_gs = gr.Checkbox(label="Infer 3D Gaussian Splatting")
|
| 118 |
+
else:
|
| 119 |
+
infer_gs = gr.Checkbox(
|
| 120 |
+
label="Infer 3D Gaussian Splatting (Not Available - gsplat not installed)",
|
| 121 |
+
interactive=False,
|
| 122 |
+
value=False
|
| 123 |
+
)
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
**优点:**
|
| 127 |
+
- ✅ 应用仍然可以启动
|
| 128 |
+
- ✅ 其他功能正常工作
|
| 129 |
+
- ✅ 用户可以选择性安装
|
| 130 |
+
|
| 131 |
+
**缺点:**
|
| 132 |
+
- ⚠️ 需要修改代码
|
| 133 |
+
- ⚠️ 3DGS 功能不可用
|
| 134 |
+
|
| 135 |
+
---
|
| 136 |
+
|
| 137 |
+
## 方案 4️⃣:使用 Docker 自定义构建
|
| 138 |
+
|
| 139 |
+
创建自定义 Docker 镜像,在本地预编译 gsplat:
|
| 140 |
+
|
| 141 |
+
**Dockerfile:**
|
| 142 |
+
```dockerfile
|
| 143 |
+
FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-runtime
|
| 144 |
+
|
| 145 |
+
WORKDIR /app
|
| 146 |
+
|
| 147 |
+
# 安装构建依赖
|
| 148 |
+
RUN apt-get update && apt-get install -y \
|
| 149 |
+
git \
|
| 150 |
+
build-essential \
|
| 151 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 152 |
+
|
| 153 |
+
# 预编译 gsplat
|
| 154 |
+
RUN pip install 'gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70'
|
| 155 |
+
|
| 156 |
+
# 安装其他依赖
|
| 157 |
+
COPY requirements.txt .
|
| 158 |
+
RUN pip install -r requirements.txt
|
| 159 |
+
|
| 160 |
+
# 复制代码
|
| 161 |
+
COPY . .
|
| 162 |
+
|
| 163 |
+
CMD ["python", "app.py"]
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
**优点:**
|
| 167 |
+
- ✅ 完全控制构建环境
|
| 168 |
+
- ✅ 可以缓存编译结果
|
| 169 |
+
- ✅ 更可靠
|
| 170 |
+
|
| 171 |
+
**缺点:**
|
| 172 |
+
- ⚠️ 需要 Docker 知识
|
| 173 |
+
- ⚠️ 镜像较大
|
| 174 |
+
- ⚠️ 构建和推送时间长
|
| 175 |
+
|
| 176 |
+
---
|
| 177 |
+
|
| 178 |
+
## 方案 5️⃣:使用环境变量控制安装
|
| 179 |
+
|
| 180 |
+
**requirements.txt:**
|
| 181 |
+
```txt
|
| 182 |
+
torch>=2.0.0
|
| 183 |
+
gradio>=5.0.0
|
| 184 |
+
# 基础依赖...
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
**安装脚本** (install_gsplat.sh):
|
| 188 |
+
```bash
|
| 189 |
+
#!/bin/bash
|
| 190 |
+
if [ "$INSTALL_GSPLAT" = "true" ]; then
|
| 191 |
+
echo "Installing gsplat..."
|
| 192 |
+
pip install 'gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70'
|
| 193 |
+
else
|
| 194 |
+
echo "Skipping gsplat installation"
|
| 195 |
+
fi
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
在 HF Spaces 设置中添加环境变量:
|
| 199 |
+
```
|
| 200 |
+
INSTALL_GSPLAT=true
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
**优点:**
|
| 204 |
+
- ✅ 灵活控制
|
| 205 |
+
- ✅ 可以快速切换
|
| 206 |
+
|
| 207 |
+
**缺点:**
|
| 208 |
+
- ⚠️ 需要额外脚本
|
| 209 |
+
- ⚠️ 不是标准方法
|
| 210 |
+
|
| 211 |
+
---
|
| 212 |
+
|
| 213 |
+
## 🔧 当前推荐配置
|
| 214 |
+
|
| 215 |
+
### 第一次尝试:方案 1(已配置)✅
|
| 216 |
+
|
| 217 |
+
**requirements.txt:**
|
| 218 |
+
```txt
|
| 219 |
+
gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
**Space 设置:**
|
| 223 |
+
- 硬件:**GPU (T4 或更高)**
|
| 224 |
+
- Python 版本:3.11
|
| 225 |
+
|
| 226 |
+
### 如果构建失败:方案 3(延迟加载)
|
| 227 |
+
|
| 228 |
+
移除 requirements.txt 中的 gsplat,修改代码使其可选。
|
| 229 |
+
|
| 230 |
+
---
|
| 231 |
+
|
| 232 |
+
## 🐛 故障排除
|
| 233 |
+
|
| 234 |
+
### 问题 1:构建超时
|
| 235 |
+
|
| 236 |
+
**错误信息:**
|
| 237 |
+
```
|
| 238 |
+
Building wheels for collected packages: gsplat
|
| 239 |
+
Building wheel for gsplat (setup.py) ... [TIMEOUT]
|
| 240 |
+
```
|
| 241 |
+
|
| 242 |
+
**解决方法:**
|
| 243 |
+
1. 确认 Space 类型是 **GPU Space**
|
| 244 |
+
2. 尝试使用更快的 commit/tag
|
| 245 |
+
3. 考虑方案 3(可选依赖)
|
| 246 |
+
|
| 247 |
+
### 问题 2:CUDA 不可用
|
| 248 |
+
|
| 249 |
+
**错误信息:**
|
| 250 |
+
```
|
| 251 |
+
torch.cuda.is_available() returned False
|
| 252 |
+
CUDA extension build requires CUDA to be available
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
**解决方法:**
|
| 256 |
+
1. 确认构建时就启用 GPU
|
| 257 |
+
2. 检查 PyTorch 是否是 CUDA 版本
|
| 258 |
+
3. 查看 [HF Spaces GPU 文档](https://huggingface.co/docs/hub/spaces-gpus)
|
| 259 |
+
|
| 260 |
+
### 问题 3:编译错误
|
| 261 |
+
|
| 262 |
+
**错误信息:**
|
| 263 |
+
```
|
| 264 |
+
error: command 'gcc' failed with exit status 1
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
**解决方法:**
|
| 268 |
+
1. 添加 packages.txt 安装编译工具:
|
| 269 |
+
```txt
|
| 270 |
+
build-essential
|
| 271 |
+
```
|
| 272 |
+
2. 使用预编译版本
|
| 273 |
+
|
| 274 |
+
---
|
| 275 |
+
|
| 276 |
+
## 📊 方案对比
|
| 277 |
+
|
| 278 |
+
| 方案 | 构建时间 | 成功率 | 复杂度 | 推荐度 |
|
| 279 |
+
|------|---------|--------|--------|--------|
|
| 280 |
+
| 1. GitHub 直接安装 | 🐌 10-15分钟 | ⚠️ 70% | 简单 | ⭐⭐⭐ |
|
| 281 |
+
| 2. PyPI 预编译 | ⚡ 1分钟 | ✅ 95% | 最简单 | ⭐⭐⭐⭐⭐ |
|
| 282 |
+
| 3. 可选依赖 | ⚡ 2分钟 | ✅ 100% | 中等 | ⭐⭐⭐⭐ |
|
| 283 |
+
| 4. Docker | 🐌 20-30分钟 | ✅ 95% | 复杂 | ⭐⭐ |
|
| 284 |
+
| 5. 环境变量控制 | 🐌 10-15分钟 | ⚠️ 70% | 中等 | ⭐⭐ |
|
| 285 |
+
|
| 286 |
+
---
|
| 287 |
+
|
| 288 |
+
## 🎯 实施步骤
|
| 289 |
+
|
| 290 |
+
### 现在(已完成)✅
|
| 291 |
+
|
| 292 |
+
1. ✅ requirements.txt 中已启用 gsplat
|
| 293 |
+
2. ✅ Python 版本设置为 3.11
|
| 294 |
+
3. ✅ README.md 配置完成
|
| 295 |
+
|
| 296 |
+
### 推送到 HF Spaces 后
|
| 297 |
+
|
| 298 |
+
1. **观察构建日志**
|
| 299 |
+
- 查看是否成功安装 gsplat
|
| 300 |
+
- 构建时间是否合理
|
| 301 |
+
|
| 302 |
+
2. **如果构建成功** 🎉
|
| 303 |
+
- 测试 3DGS 功能
|
| 304 |
+
- 完成!
|
| 305 |
+
|
| 306 |
+
3. **如果构建失败** ⚠️
|
| 307 |
+
- 复制错误信息
|
| 308 |
+
- 根据上面的故障排除指南修复
|
| 309 |
+
- 或者切换到方案 3(可选依赖)
|
| 310 |
+
|
| 311 |
+
---
|
| 312 |
+
|
| 313 |
+
## 📝 测试清单
|
| 314 |
+
|
| 315 |
+
部署前本地测试:
|
| 316 |
+
|
| 317 |
+
```bash
|
| 318 |
+
# 1. 测试 gsplat 安装
|
| 319 |
+
pip install 'gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70'
|
| 320 |
+
|
| 321 |
+
# 2. 测试导入
|
| 322 |
+
python -c "import gsplat; print('gsplat version:', gsplat.__version__)"
|
| 323 |
+
|
| 324 |
+
# 3. 测试你的代码
|
| 325 |
+
python -c "from depth_anything_3.utils.export.gs import export_to_gs_video; print('✅ import success')"
|
| 326 |
+
|
| 327 |
+
# 4. 启动应用测试
|
| 328 |
+
python app.py
|
| 329 |
+
```
|
| 330 |
+
|
| 331 |
+
---
|
| 332 |
+
|
| 333 |
+
## 🔗 相关资源
|
| 334 |
+
|
| 335 |
+
- [gsplat GitHub](https://github.com/nerfstudio-project/gsplat)
|
| 336 |
+
- [HF Spaces GPU 文档](https://huggingface.co/docs/hub/spaces-gpus)
|
| 337 |
+
- [PyTorch CUDA 安装](https://pytorch.org/get-started/locally/)
|
| 338 |
+
|
| 339 |
+
---
|
| 340 |
+
|
| 341 |
+
## 💡 最终建议
|
| 342 |
+
|
| 343 |
+
1. **先尝试方案 1**(当前配置)- 直接在 HF Spaces 上构建
|
| 344 |
+
2. **如果失败**,切换到**方案 3**(可选依赖)- 让应用可以在没有 gsplat 的情况下运行
|
| 345 |
+
3. **长期方案**:如果 gsplat 发布 PyPI 版本,立即切换到方案 2
|
| 346 |
+
|
| 347 |
+
祝你部署顺利!🚀
|
| 348 |
+
|
HF_SPACES_BUILD.md
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces 构建和环境安装详解
|
| 2 |
+
|
| 3 |
+
## 🏗️ 构建流程概览
|
| 4 |
+
|
| 5 |
+
```mermaid
|
| 6 |
+
graph TD
|
| 7 |
+
A[推送代码到 Space] --> B[检测 SDK 类型]
|
| 8 |
+
B --> C[读取 README.md 配置]
|
| 9 |
+
C --> D[查找依赖文件]
|
| 10 |
+
D --> E{依赖文件类型}
|
| 11 |
+
E -->|requirements.txt| F[pip install -r requirements.txt]
|
| 12 |
+
E -->|pyproject.toml| G[pip install -e .]
|
| 13 |
+
E -->|packages.txt| H[apt-get install]
|
| 14 |
+
F --> I[启动应用]
|
| 15 |
+
G --> I
|
| 16 |
+
H --> I
|
| 17 |
+
I --> J[运行 app.py]
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
## 📋 步骤详解
|
| 21 |
+
|
| 22 |
+
### 第 1 步:Space 配置检测
|
| 23 |
+
|
| 24 |
+
HF Spaces 读取 `README.md` 的 YAML 前置内容:
|
| 25 |
+
|
| 26 |
+
```yaml
|
| 27 |
+
---
|
| 28 |
+
title: Depth Anything 3
|
| 29 |
+
emoji: 🏢
|
| 30 |
+
colorFrom: indigo
|
| 31 |
+
colorTo: pink
|
| 32 |
+
sdk: gradio # 🔑 关键:指定使用 Gradio SDK
|
| 33 |
+
sdk_version: 5.49.1 # Gradio 版本
|
| 34 |
+
app_file: app.py # 🔑 关键:入口文件
|
| 35 |
+
pinned: false
|
| 36 |
+
license: cc-by-nc-4.0
|
| 37 |
+
---
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
### 第 2 步:依赖文件优先级
|
| 41 |
+
|
| 42 |
+
HF Spaces 按以下顺序查找依赖文件(找到第一个就使用):
|
| 43 |
+
|
| 44 |
+
#### 1. `requirements.txt` ⭐ (最推荐)
|
| 45 |
+
|
| 46 |
+
```txt
|
| 47 |
+
torch>=2.0.0
|
| 48 |
+
gradio>=5.0.0
|
| 49 |
+
spaces
|
| 50 |
+
numpy<2
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
**安装命令:**
|
| 54 |
+
```bash
|
| 55 |
+
pip install -r requirements.txt
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
**优点:**
|
| 59 |
+
- ✅ 简单直接
|
| 60 |
+
- ✅ 构建速度快
|
| 61 |
+
- ✅ 兼容性最好
|
| 62 |
+
- ✅ 错误信息清晰
|
| 63 |
+
|
| 64 |
+
#### 2. `pyproject.toml` (你当前使用的)
|
| 65 |
+
|
| 66 |
+
```toml
|
| 67 |
+
[project]
|
| 68 |
+
dependencies = ["torch>=2", "numpy<2"]
|
| 69 |
+
|
| 70 |
+
[project.optional-dependencies]
|
| 71 |
+
app = ["gradio>=5", "spaces"]
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
**安装命令:**
|
| 75 |
+
```bash
|
| 76 |
+
pip install -e .
|
| 77 |
+
# 或者包含 optional dependencies
|
| 78 |
+
pip install -e ".[app]"
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
**问题:**
|
| 82 |
+
- ⚠️ 可能不会自动安装 `[project.optional-dependencies]`
|
| 83 |
+
- ⚠️ 需要正确的包结构(`src/` 目录等)
|
| 84 |
+
- ⚠️ 构建时间较长
|
| 85 |
+
|
| 86 |
+
#### 3. `packages.txt` (系统级依赖)
|
| 87 |
+
|
| 88 |
+
```txt
|
| 89 |
+
ffmpeg
|
| 90 |
+
libsm6
|
| 91 |
+
libxext6
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
**安装命令:**
|
| 95 |
+
```bash
|
| 96 |
+
apt-get update
|
| 97 |
+
apt-get install -y ffmpeg libsm6 libxext6
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
**用途:**
|
| 101 |
+
- 安装系统级库(非 Python 包)
|
| 102 |
+
- OpenCV 可能需要的系统库
|
| 103 |
+
- 音视频处理工具
|
| 104 |
+
|
| 105 |
+
### 第 3 步:实际构建过程
|
| 106 |
+
|
| 107 |
+
```bash
|
| 108 |
+
# === HF Spaces 内部执行的命令(简化版) ===
|
| 109 |
+
|
| 110 |
+
# 1. 准备环境
|
| 111 |
+
export HOME=/home/user
|
| 112 |
+
export PYTHONPATH=/home/user/app:$PYTHONPATH
|
| 113 |
+
|
| 114 |
+
# 2. 安装 Python 基础环境
|
| 115 |
+
python -m pip install --upgrade pip setuptools wheel
|
| 116 |
+
|
| 117 |
+
# 3. 安装系统依赖(如果有 packages.txt)
|
| 118 |
+
if [ -f packages.txt ]; then
|
| 119 |
+
apt-get update
|
| 120 |
+
xargs -a packages.txt apt-get install -y
|
| 121 |
+
fi
|
| 122 |
+
|
| 123 |
+
# 4. 安装 Python 依赖
|
| 124 |
+
if [ -f requirements.txt ]; then
|
| 125 |
+
pip install -r requirements.txt
|
| 126 |
+
elif [ -f pyproject.toml ]; then
|
| 127 |
+
pip install -e .
|
| 128 |
+
fi
|
| 129 |
+
|
| 130 |
+
# 5. 启动应用
|
| 131 |
+
python app.py
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
## 🔍 你的项目构建分析
|
| 135 |
+
|
| 136 |
+
### 当前问题:使用 pyproject.toml
|
| 137 |
+
|
| 138 |
+
你的 `pyproject.toml` 配置:
|
| 139 |
+
|
| 140 |
+
```toml
|
| 141 |
+
[project]
|
| 142 |
+
dependencies = [
|
| 143 |
+
"torch>=2",
|
| 144 |
+
"gradio", # ❌ 这里没有 gradio!
|
| 145 |
+
# ...
|
| 146 |
+
]
|
| 147 |
+
|
| 148 |
+
[project.optional-dependencies]
|
| 149 |
+
app = ["gradio>=5", "spaces"] # ✅ gradio 在这里
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
**问题:**
|
| 153 |
+
- HF Spaces 可能只安装 `dependencies`,不安装 `optional-dependencies`
|
| 154 |
+
- 导致 `gradio` 和 `spaces` 可能不会被安装
|
| 155 |
+
|
| 156 |
+
### 解决方案 1:使用 requirements.txt (推荐) ✅
|
| 157 |
+
|
| 158 |
+
我已经为你创建了 `requirements.txt`,HF Spaces 会优先使用它:
|
| 159 |
+
|
| 160 |
+
```bash
|
| 161 |
+
# Spaces 会自动执行
|
| 162 |
+
pip install -r requirements.txt
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
### 解决方案 2:修改 pyproject.toml
|
| 166 |
+
|
| 167 |
+
将 gradio 移到主依赖:
|
| 168 |
+
|
| 169 |
+
```toml
|
| 170 |
+
[project]
|
| 171 |
+
dependencies = [
|
| 172 |
+
"torch>=2",
|
| 173 |
+
"gradio>=5",
|
| 174 |
+
"spaces",
|
| 175 |
+
# ... 其他依赖
|
| 176 |
+
]
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
### 解决方案 3:创建 .spacesrc
|
| 180 |
+
|
| 181 |
+
创建 `.spacesrc` 文件自定义构建:
|
| 182 |
+
|
| 183 |
+
```bash
|
| 184 |
+
pip install -e ".[app,gs]"
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
## 🚀 推荐配置
|
| 188 |
+
|
| 189 |
+
对于 HF Spaces 部署,推荐的文件结构:
|
| 190 |
+
|
| 191 |
+
```
|
| 192 |
+
depth-anything-3/
|
| 193 |
+
├── app.py # 入口文件
|
| 194 |
+
├── requirements.txt # Python 依赖(优先)
|
| 195 |
+
├── packages.txt # 系统依赖(可选)
|
| 196 |
+
├── README.md # Space 配置
|
| 197 |
+
├── src/
|
| 198 |
+
│ └── depth_anything_3/
|
| 199 |
+
│ └── ...
|
| 200 |
+
└── pyproject.toml # 项目配置(备用)
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
## ⚡ 构建优化建议
|
| 204 |
+
|
| 205 |
+
### 1. 固定版本号
|
| 206 |
+
|
| 207 |
+
```txt
|
| 208 |
+
# ❌ 不推荐(构建不稳定)
|
| 209 |
+
torch>=2
|
| 210 |
+
gradio>=5
|
| 211 |
+
|
| 212 |
+
# ✅ 推荐(构建稳定)
|
| 213 |
+
torch==2.1.0
|
| 214 |
+
gradio==5.49.1
|
| 215 |
+
```
|
| 216 |
+
|
| 217 |
+
### 2. 预构建的 wheels
|
| 218 |
+
|
| 219 |
+
使用 PyPI 有预构建 wheel 的版本,避免从源码编译:
|
| 220 |
+
|
| 221 |
+
```txt
|
| 222 |
+
# ✅ 快速安装
|
| 223 |
+
torch==2.1.0
|
| 224 |
+
torchvision==0.16.0
|
| 225 |
+
|
| 226 |
+
# ⚠️ 慢(从源码编译)
|
| 227 |
+
gsplat @ git+https://github.com/...
|
| 228 |
+
```
|
| 229 |
+
|
| 230 |
+
### 3. 使用 Docker(高级)
|
| 231 |
+
|
| 232 |
+
创建自定义 Docker 镜像:
|
| 233 |
+
|
| 234 |
+
```dockerfile
|
| 235 |
+
FROM python:3.10
|
| 236 |
+
WORKDIR /app
|
| 237 |
+
COPY requirements.txt .
|
| 238 |
+
RUN pip install -r requirements.txt
|
| 239 |
+
COPY . .
|
| 240 |
+
CMD ["python", "app.py"]
|
| 241 |
+
```
|
| 242 |
+
|
| 243 |
+
## 🐛 常见问题
|
| 244 |
+
|
| 245 |
+
### Q1: 为什么构建失败?
|
| 246 |
+
|
| 247 |
+
**检查清单:**
|
| 248 |
+
1. ✅ 依赖文件是否存在?
|
| 249 |
+
2. ✅ 版本号是否兼容?
|
| 250 |
+
3. ✅ 是否需要系统依赖(packages.txt)?
|
| 251 |
+
4. ✅ 包名是否正确?
|
| 252 |
+
|
| 253 |
+
### Q2: 如何查看构建日志?
|
| 254 |
+
|
| 255 |
+
在 Space 页面:
|
| 256 |
+
1. 点击右上角 "Settings"
|
| 257 |
+
2. 滚动到 "Build logs"
|
| 258 |
+
3. 查看详细日志
|
| 259 |
+
|
| 260 |
+
### Q3: 构建时间太长怎么办?
|
| 261 |
+
|
| 262 |
+
**优化方法:**
|
| 263 |
+
1. 使用 `requirements.txt` 而不是 `pyproject.toml`
|
| 264 |
+
2. 移除不必要的依赖
|
| 265 |
+
3. 使用预构建的 wheels
|
| 266 |
+
4. 考虑使用 Docker 镜像缓存
|
| 267 |
+
|
| 268 |
+
### Q4: 本地能运行,Spaces 上失败?
|
| 269 |
+
|
| 270 |
+
**可能原因:**
|
| 271 |
+
1. 缺少系统依赖(需要 packages.txt)
|
| 272 |
+
2. 路径问题(本地是绝对路径)
|
| 273 |
+
3. 环境变量不同
|
| 274 |
+
4. Python 版本不同
|
| 275 |
+
|
| 276 |
+
**解决方法:**
|
| 277 |
+
```toml
|
| 278 |
+
# README.md 中指定 Python 版本
|
| 279 |
+
---
|
| 280 |
+
sdk: gradio
|
| 281 |
+
python_version: 3.10
|
| 282 |
+
---
|
| 283 |
+
```
|
| 284 |
+
|
| 285 |
+
## 📊 构建时间参考
|
| 286 |
+
|
| 287 |
+
| 依赖方式 | 平均构建时间 | 稳定性 |
|
| 288 |
+
|---------|------------|--------|
|
| 289 |
+
| requirements.txt | 2-5 分钟 | ⭐⭐⭐⭐⭐ |
|
| 290 |
+
| pyproject.toml | 5-10 分钟 | ⭐⭐⭐ |
|
| 291 |
+
| 从源码编译 | 10-30 分钟 | ⭐⭐ |
|
| 292 |
+
|
| 293 |
+
## 🎯 最佳实践
|
| 294 |
+
|
| 295 |
+
1. **使用 requirements.txt** 作为主要依赖管理
|
| 296 |
+
2. **固定关键依赖的版本号**
|
| 297 |
+
3. **测试本地环境** 使用 `pip install -r requirements.txt`
|
| 298 |
+
4. **监控构建日志** 及时发现问题
|
| 299 |
+
5. **逐步添加依赖** 一个一个测试,而不是一次性全加
|
| 300 |
+
|
| 301 |
+
## 🔗 相关资源
|
| 302 |
+
|
| 303 |
+
- [HF Spaces 文档](https://huggingface.co/docs/hub/spaces)
|
| 304 |
+
- [Gradio Spaces 指南](https://huggingface.co/docs/hub/spaces-sdks-gradio)
|
| 305 |
+
- [依赖管理](https://huggingface.co/docs/hub/spaces-dependencies)
|
| 306 |
+
|
PYTHON_VERSION_CONFIG.md
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python 版本配置说明
|
| 2 |
+
|
| 3 |
+
## 📋 Python 版本配置位置
|
| 4 |
+
|
| 5 |
+
### ✅ 已为你配置的 3 个地方:
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## 1️⃣ README.md (Hugging Face Spaces) ⭐ **最重要**
|
| 10 |
+
|
| 11 |
+
```yaml
|
| 12 |
+
---
|
| 13 |
+
title: Depth Anything 3
|
| 14 |
+
sdk: gradio
|
| 15 |
+
sdk_version: 5.49.1
|
| 16 |
+
app_file: app.py
|
| 17 |
+
python_version: 3.11 # 🔑 关键配置
|
| 18 |
+
---
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
**作用范围:** Hugging Face Spaces 部署
|
| 22 |
+
**优先级:** 🔥 最高(Spaces 专用)
|
| 23 |
+
|
| 24 |
+
**支持的版本:**
|
| 25 |
+
- `3.8`
|
| 26 |
+
- `3.9`
|
| 27 |
+
- `3.10`
|
| 28 |
+
- `3.11` ✅ (你选择的)
|
| 29 |
+
- `3.12` (较新,可能有兼容性问题)
|
| 30 |
+
|
| 31 |
+
**注意:**
|
| 32 |
+
- 这是 HF Spaces 唯一识别的配置
|
| 33 |
+
- 如果不指定,默认使用 `3.10`
|
| 34 |
+
- 必须是精确版本号(如 `3.11`),不能用范围(如 `>=3.11`)
|
| 35 |
+
|
| 36 |
+
---
|
| 37 |
+
|
| 38 |
+
## 2️⃣ pyproject.toml (项目配置)
|
| 39 |
+
|
| 40 |
+
```toml
|
| 41 |
+
[project]
|
| 42 |
+
requires-python = ">=3.11" # ✅ 已配置
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
**作用范围:**
|
| 46 |
+
- 本地开发
|
| 47 |
+
- pip 安装时版本检查
|
| 48 |
+
- 包管理器(poetry, hatch 等)
|
| 49 |
+
|
| 50 |
+
**优先级:** 中等
|
| 51 |
+
|
| 52 |
+
**支持格式:**
|
| 53 |
+
```toml
|
| 54 |
+
requires-python = ">=3.11" # 最低 3.11
|
| 55 |
+
requires-python = ">=3.11, <3.13" # 3.11 到 3.12
|
| 56 |
+
requires-python = "~=3.11" # 3.11.x 系列
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
**效果:**
|
| 60 |
+
```bash
|
| 61 |
+
# 如果 Python 版本不符合要求,安装时会报错
|
| 62 |
+
$ pip install .
|
| 63 |
+
ERROR: Package requires a different Python: 3.9.0 not in '>=3.11'
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
## 3️⃣ runtime.txt (备用方式)
|
| 69 |
+
|
| 70 |
+
```txt
|
| 71 |
+
python-3.11
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
**作用范围:**
|
| 75 |
+
- Heroku
|
| 76 |
+
- 某些 Docker 构建系统
|
| 77 |
+
- HF Spaces (备用,如果 README.md 没有配置)
|
| 78 |
+
|
| 79 |
+
**优先级:** 低
|
| 80 |
+
|
| 81 |
+
**格式:**
|
| 82 |
+
```txt
|
| 83 |
+
python-3.11 # ✅ 精确版本
|
| 84 |
+
python-3.11.5 # ✅ 更精确的版本
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
---
|
| 88 |
+
|
| 89 |
+
## 🎯 配置优先级(Hugging Face Spaces)
|
| 90 |
+
|
| 91 |
+
```
|
| 92 |
+
README.md (python_version)
|
| 93 |
+
↓ 最高优先级
|
| 94 |
+
runtime.txt
|
| 95 |
+
↓ 次要优先级
|
| 96 |
+
默认版本 (3.10)
|
| 97 |
+
↓ 兜底
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
**最佳实践:** 同时配置 `README.md` 和 `pyproject.toml`
|
| 101 |
+
|
| 102 |
+
---
|
| 103 |
+
|
| 104 |
+
## 🔍 如何验证配置生效?
|
| 105 |
+
|
| 106 |
+
### 在 Hugging Face Spaces:
|
| 107 |
+
|
| 108 |
+
部署后,查看构建日志:
|
| 109 |
+
|
| 110 |
+
```bash
|
| 111 |
+
# 日志中会显示
|
| 112 |
+
Setting up Python 3.11...
|
| 113 |
+
Python 3.11.5
|
| 114 |
+
pip 23.2.1
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### 在本地验证:
|
| 118 |
+
|
| 119 |
+
```bash
|
| 120 |
+
# 检查 Python 版本
|
| 121 |
+
python --version
|
| 122 |
+
# Python 3.11.5
|
| 123 |
+
|
| 124 |
+
# 尝试安装(检查 requires-python)
|
| 125 |
+
pip install -e .
|
| 126 |
+
# 如果版本不符合,会报错
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
---
|
| 130 |
+
|
| 131 |
+
## 🚨 常见问题
|
| 132 |
+
|
| 133 |
+
### Q1: 为什么选择 Python 3.11?
|
| 134 |
+
|
| 135 |
+
**优点:**
|
| 136 |
+
- ✅ 性能提升(比 3.10 快 10-60%)
|
| 137 |
+
- ✅ 更好的错误信息
|
| 138 |
+
- ✅ 新的类型特性
|
| 139 |
+
- ✅ Gradio 5+ 完全支持
|
| 140 |
+
|
| 141 |
+
**注意:**
|
| 142 |
+
- ⚠️ 某些老库可能不支持(如 gsplat)
|
| 143 |
+
- ⚠️ 需要测试所有依赖是否兼容
|
| 144 |
+
|
| 145 |
+
### Q2: 如果我想支持多个版本怎么办?
|
| 146 |
+
|
| 147 |
+
**pyproject.toml 配置:**
|
| 148 |
+
```toml
|
| 149 |
+
requires-python = ">=3.11, <3.13" # 支持 3.11 和 3.12
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
**但 HF Spaces 只能选一个:**
|
| 153 |
+
```yaml
|
| 154 |
+
python_version: 3.11 # 只能指定一个精确版本
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
### Q3: 如何测试不同 Python 版本?
|
| 158 |
+
|
| 159 |
+
**使用 pyenv:**
|
| 160 |
+
```bash
|
| 161 |
+
# 安装多个 Python 版本
|
| 162 |
+
pyenv install 3.11.5
|
| 163 |
+
pyenv install 3.12.0
|
| 164 |
+
|
| 165 |
+
# 切换版本测试
|
| 166 |
+
pyenv local 3.11.5
|
| 167 |
+
python --version
|
| 168 |
+
pip install -e .
|
| 169 |
+
python app.py
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
**使用 Docker:**
|
| 173 |
+
```dockerfile
|
| 174 |
+
FROM python:3.11
|
| 175 |
+
WORKDIR /app
|
| 176 |
+
COPY . .
|
| 177 |
+
RUN pip install -r requirements.txt
|
| 178 |
+
CMD ["python", "app.py"]
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
### Q4: 版本冲突怎么办?
|
| 182 |
+
|
| 183 |
+
**场景:** 某个依赖不支持 Python 3.11
|
| 184 |
+
|
| 185 |
+
**解决方法:**
|
| 186 |
+
|
| 187 |
+
1. **找替代包**
|
| 188 |
+
```txt
|
| 189 |
+
# requirements.txt
|
| 190 |
+
old-package # 不支持 3.11
|
| 191 |
+
↓
|
| 192 |
+
new-package # 支持 3.11
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
2. **降级 Python 版本**
|
| 196 |
+
```yaml
|
| 197 |
+
python_version: 3.10 # 改回 3.10
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
3. **等待上游更新**
|
| 201 |
+
```bash
|
| 202 |
+
pip install git+https://github.com/xxx/package@main
|
| 203 |
+
```
|
| 204 |
+
|
| 205 |
+
---
|
| 206 |
+
|
| 207 |
+
## 📊 Python 版本兼容性参考
|
| 208 |
+
|
| 209 |
+
| Python 版本 | Gradio 5 | PyTorch 2.x | Spaces 支持 | 推荐 |
|
| 210 |
+
|------------|----------|-------------|------------|------|
|
| 211 |
+
| 3.8 | ✅ | ✅ | ✅ | ❌ (太旧) |
|
| 212 |
+
| 3.9 | ✅ | ✅ | ✅ | ⚠️ |
|
| 213 |
+
| 3.10 | ✅ | ✅ | ✅ | ✅ |
|
| 214 |
+
| 3.11 | ✅ | ✅ | ✅ | ⭐ 推荐 |
|
| 215 |
+
| 3.12 | ✅ | ⚠️ | ✅ | ⚠️ (较新) |
|
| 216 |
+
| 3.13 | ⚠️ | ❌ | ⚠️ | ❌ (太新) |
|
| 217 |
+
|
| 218 |
+
---
|
| 219 |
+
|
| 220 |
+
## 🎓 完整配置示例
|
| 221 |
+
|
| 222 |
+
### 你当前的配置(已完成)✅
|
| 223 |
+
|
| 224 |
+
**README.md:**
|
| 225 |
+
```yaml
|
| 226 |
+
---
|
| 227 |
+
python_version: 3.11
|
| 228 |
+
---
|
| 229 |
+
```
|
| 230 |
+
|
| 231 |
+
**pyproject.toml:**
|
| 232 |
+
```toml
|
| 233 |
+
requires-python = ">=3.11"
|
| 234 |
+
```
|
| 235 |
+
|
| 236 |
+
**runtime.txt:**
|
| 237 |
+
```txt
|
| 238 |
+
python-3.11
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
### 如果要降级到 3.10:
|
| 242 |
+
|
| 243 |
+
**README.md:**
|
| 244 |
+
```yaml
|
| 245 |
+
python_version: 3.10
|
| 246 |
+
```
|
| 247 |
+
|
| 248 |
+
**pyproject.toml:**
|
| 249 |
+
```toml
|
| 250 |
+
requires-python = ">=3.10"
|
| 251 |
+
```
|
| 252 |
+
|
| 253 |
+
**runtime.txt:**
|
| 254 |
+
```txt
|
| 255 |
+
python-3.10
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
---
|
| 259 |
+
|
| 260 |
+
## 🔧 测试清单
|
| 261 |
+
|
| 262 |
+
部署前检查:
|
| 263 |
+
|
| 264 |
+
- [ ] ✅ README.md 有 `python_version: 3.11`
|
| 265 |
+
- [ ] ✅ pyproject.toml 有 `requires-python = ">=3.11"`
|
| 266 |
+
- [ ] ✅ 本地测试使用 Python 3.11
|
| 267 |
+
- [ ] ✅ 所有依赖支持 Python 3.11
|
| 268 |
+
- [ ] ✅ requirements.txt 包含所有依赖
|
| 269 |
+
- [ ] ✅ app.py 可以正常启动
|
| 270 |
+
|
| 271 |
+
---
|
| 272 |
+
|
| 273 |
+
## 📚 参考资料
|
| 274 |
+
|
| 275 |
+
- [HF Spaces Python 版���文档](https://huggingface.co/docs/hub/spaces-config-reference#python_version)
|
| 276 |
+
- [Python 版本发布时间表](https://devguide.python.org/versions/)
|
| 277 |
+
- [PyPI 包兼容性查询](https://pypi.org/)
|
| 278 |
+
|
| 279 |
+
---
|
| 280 |
+
|
| 281 |
+
## 💡 总结
|
| 282 |
+
|
| 283 |
+
**对于 Hugging Face Spaces 部署:**
|
| 284 |
+
|
| 285 |
+
1. **必须配置:** `README.md` 中的 `python_version: 3.11`
|
| 286 |
+
2. **推荐配置:** `pyproject.toml` 中的 `requires-python = ">=3.11"`
|
| 287 |
+
3. **可选配置:** `runtime.txt`(备用)
|
| 288 |
+
|
| 289 |
+
**当前配置状态:** ✅ 全部已配置完成!
|
| 290 |
+
|
README.md
CHANGED
|
@@ -6,6 +6,7 @@ colorTo: pink
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.49.1
|
| 8 |
app_file: app.py
|
|
|
|
| 9 |
pinned: false
|
| 10 |
license: cc-by-nc-4.0
|
| 11 |
---
|
|
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.49.1
|
| 8 |
app_file: app.py
|
| 9 |
+
python_version: 3.11
|
| 10 |
pinned: false
|
| 11 |
license: cc-by-nc-4.0
|
| 12 |
---
|
SPACES_SETUP.md
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces 部署指南
|
| 2 |
+
|
| 3 |
+
## 📋 概述
|
| 4 |
+
|
| 5 |
+
这个项目已经配置好可以部署到 Hugging Face Spaces,使用 `@spaces.GPU` 装饰器来动态分配 GPU 资源。
|
| 6 |
+
|
| 7 |
+
## 🎯 关键文件
|
| 8 |
+
|
| 9 |
+
### 1. `app.py` - 主应用文件
|
| 10 |
+
|
| 11 |
+
```python
|
| 12 |
+
import spaces
|
| 13 |
+
from depth_anything_3.app.gradio_app import DepthAnything3App
|
| 14 |
+
from depth_anything_3.app.modules.model_inference import ModelInference
|
| 15 |
+
|
| 16 |
+
# 使用 monkey-patching 将 GPU 装饰器应用到推理函数
|
| 17 |
+
original_run_inference = ModelInference.run_inference
|
| 18 |
+
|
| 19 |
+
@spaces.GPU(duration=120) # 请求 GPU,最多 120 秒
|
| 20 |
+
def gpu_run_inference(self, *args, **kwargs):
|
| 21 |
+
return original_run_inference(self, *args, **kwargs)
|
| 22 |
+
|
| 23 |
+
ModelInference.run_inference = gpu_run_inference
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
**工作原理:**
|
| 27 |
+
- `@spaces.GPU` 装饰器在函数调用时动态分配 GPU
|
| 28 |
+
- `duration=120` 表示单次推理最多使用 GPU 120 秒
|
| 29 |
+
- 通过 monkey-patching,我们将装饰器应用到已有的推理函数上,无需修改核心代码
|
| 30 |
+
|
| 31 |
+
### 2. `README.md` - Spaces 配置
|
| 32 |
+
|
| 33 |
+
```yaml
|
| 34 |
+
---
|
| 35 |
+
title: Depth Anything 3
|
| 36 |
+
sdk: gradio
|
| 37 |
+
sdk_version: 5.49.1
|
| 38 |
+
app_file: app.py
|
| 39 |
+
pinned: false
|
| 40 |
+
license: cc-by-nc-4.0
|
| 41 |
+
---
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
这个 YAML 前置内容告诉 Hugging Face Spaces:
|
| 45 |
+
- 使用 Gradio SDK
|
| 46 |
+
- 入口文件是 `app.py`
|
| 47 |
+
- 使用的 Gradio 版本
|
| 48 |
+
|
| 49 |
+
### 3. `pyproject.toml` - 依赖配置
|
| 50 |
+
|
| 51 |
+
已经更新,包含了 `spaces` 依赖:
|
| 52 |
+
|
| 53 |
+
```toml
|
| 54 |
+
[project.optional-dependencies]
|
| 55 |
+
app = ["gradio>=5", "pillow>=9.0", "spaces"]
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
## 🚀 部署步骤
|
| 59 |
+
|
| 60 |
+
### 方式 1:通过 Hugging Face 网页界面
|
| 61 |
+
|
| 62 |
+
1. 在 Hugging Face 创建一个新的 Space
|
| 63 |
+
2. 选择 **Gradio** 作为 SDK
|
| 64 |
+
3. 上传你的代码(包括 `app.py`, `src/`, `pyproject.toml` 等)
|
| 65 |
+
4. Space 会自动构建并启动
|
| 66 |
+
|
| 67 |
+
### 方式 2:通过 Git
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
# 克隆你的 Space 仓库
|
| 71 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
|
| 72 |
+
cd YOUR_SPACE_NAME
|
| 73 |
+
|
| 74 |
+
# 添加你的代码
|
| 75 |
+
cp -r /path/to/depth-anything-3/* .
|
| 76 |
+
|
| 77 |
+
# 提交并推送
|
| 78 |
+
git add .
|
| 79 |
+
git commit -m "Initial commit"
|
| 80 |
+
git push
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
## 🔧 配置选项
|
| 84 |
+
|
| 85 |
+
### GPU 类型
|
| 86 |
+
|
| 87 |
+
Hugging Face Spaces 支持不同的 GPU 类型:
|
| 88 |
+
|
| 89 |
+
- **Free (T4)**: 免费,适合小型模型
|
| 90 |
+
- **A10G**: 付费,更强大
|
| 91 |
+
- **A100**: 付费,最强大
|
| 92 |
+
|
| 93 |
+
### GPU Duration
|
| 94 |
+
|
| 95 |
+
在 `app.py` 中可以调整:
|
| 96 |
+
|
| 97 |
+
```python
|
| 98 |
+
@spaces.GPU(duration=120) # 120 秒
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
- 设置太短:复杂推理可能超时
|
| 102 |
+
- 设置太长:浪费资源
|
| 103 |
+
- 推荐:根据实际推理时间设置(可以先设长一点,然后根据日志调整)
|
| 104 |
+
|
| 105 |
+
### 环境变量
|
| 106 |
+
|
| 107 |
+
可以在 Space 设置中配置环境变量:
|
| 108 |
+
|
| 109 |
+
- `DA3_MODEL_DIR`: 模型目录路径
|
| 110 |
+
- `DA3_WORKSPACE_DIR`: 工作空间目录
|
| 111 |
+
- `DA3_GALLERY_DIR`: 图库目录
|
| 112 |
+
|
| 113 |
+
## 📊 监控和调试
|
| 114 |
+
|
| 115 |
+
### 查看日志
|
| 116 |
+
|
| 117 |
+
在 Spaces 界面点击 "Logs" 标签可以看到:
|
| 118 |
+
|
| 119 |
+
```
|
| 120 |
+
🚀 Launching Depth Anything 3 on Hugging Face Spaces...
|
| 121 |
+
📦 Model Directory: depth-anything/DA3NESTED-GIANT-LARGE
|
| 122 |
+
📁 Workspace Directory: workspace/gradio
|
| 123 |
+
🖼️ Gallery Directory: workspace/gallery
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
### GPU 使用情况
|
| 127 |
+
|
| 128 |
+
在装饰的函数内部,可以检查 GPU 状态:
|
| 129 |
+
|
| 130 |
+
```python
|
| 131 |
+
print(torch.cuda.is_available()) # True
|
| 132 |
+
print(torch.cuda.device_count()) # 1 (通常)
|
| 133 |
+
print(torch.cuda.get_device_name(0)) # 'Tesla T4' 或其他
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
## 🎓 示例代码
|
| 137 |
+
|
| 138 |
+
查看 `example_spaces_gpu.py` 了解 `@spaces.GPU` 装饰器的基本用法。
|
| 139 |
+
|
| 140 |
+
## ❓ 常见问题
|
| 141 |
+
|
| 142 |
+
### Q: 为什么使用 monkey-patching?
|
| 143 |
+
|
| 144 |
+
A: 这样可以在不修改核心代码的情况下添加 Spaces 支持。如果你想更优雅的方式,可以:
|
| 145 |
+
|
| 146 |
+
1. 直接在 `ModelInference.run_inference` 方法上添加装饰器
|
| 147 |
+
2. 创建一个继承自 `ModelInference` 的新类
|
| 148 |
+
|
| 149 |
+
### Q: 如何测试本地是否能运行?
|
| 150 |
+
|
| 151 |
+
A: 本地运行时,`spaces.GPU` 装饰器会被忽略(如果没有安装 spaces 包),或者会直接执行函数而不做特殊处理。
|
| 152 |
+
|
| 153 |
+
```bash
|
| 154 |
+
# 本地测试
|
| 155 |
+
python app.py
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
### Q: 可以装饰多个函数吗?
|
| 159 |
+
|
| 160 |
+
A: 可以!你可以给任何需要 GPU 的函数添加 `@spaces.GPU` 装饰器。
|
| 161 |
+
|
| 162 |
+
```python
|
| 163 |
+
@spaces.GPU(duration=60)
|
| 164 |
+
def function1():
|
| 165 |
+
pass
|
| 166 |
+
|
| 167 |
+
@spaces.GPU(duration=120)
|
| 168 |
+
def function2():
|
| 169 |
+
pass
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
### Q: 如何优化 GPU 使用?
|
| 173 |
+
|
| 174 |
+
A: 一些建议:
|
| 175 |
+
|
| 176 |
+
1. **只装饰必要的函数**:不要装饰整个 app,只装饰实际使用 GPU 的推理函数
|
| 177 |
+
2. **设置合适的 duration**:根据实际需求设置
|
| 178 |
+
3. **清理 GPU 内存**:在函数结束时调用 `torch.cuda.empty_cache()`
|
| 179 |
+
4. **批处理**:如果可能,批量处理多个请求
|
| 180 |
+
|
| 181 |
+
## 🔗 相关资源
|
| 182 |
+
|
| 183 |
+
- [Hugging Face Spaces 文档](https://huggingface.co/docs/hub/spaces)
|
| 184 |
+
- [Spaces GPU 使用指南](https://huggingface.co/docs/hub/spaces-gpus)
|
| 185 |
+
- [Gradio 文档](https://gradio.app/docs)
|
| 186 |
+
|
| 187 |
+
## 📝 许可证
|
| 188 |
+
|
| 189 |
+
Apache-2.0
|
| 190 |
+
|
app.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
Hugging Face Spaces App for Depth Anything 3.
|
| 17 |
+
|
| 18 |
+
This app uses the @spaces.GPU decorator to dynamically allocate GPU resources
|
| 19 |
+
for model inference on Hugging Face Spaces.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import os
|
| 23 |
+
import spaces
|
| 24 |
+
from depth_anything_3.app.gradio_app import DepthAnything3App
|
| 25 |
+
from depth_anything_3.app.modules.model_inference import ModelInference
|
| 26 |
+
|
| 27 |
+
# Monkey-patch the run_inference method to use @spaces.GPU decorator
|
| 28 |
+
# This allows dynamic GPU allocation on Hugging Face Spaces
|
| 29 |
+
original_run_inference = ModelInference.run_inference
|
| 30 |
+
|
| 31 |
+
@spaces.GPU(duration=120) # Request GPU for up to 120 seconds per inference
|
| 32 |
+
def gpu_run_inference(self, *args, **kwargs):
|
| 33 |
+
"""
|
| 34 |
+
GPU-accelerated inference with Spaces decorator.
|
| 35 |
+
|
| 36 |
+
This function wraps the original run_inference method with @spaces.GPU,
|
| 37 |
+
which ensures the model is moved to GPU when needed on HF Spaces.
|
| 38 |
+
"""
|
| 39 |
+
return original_run_inference(self, *args, **kwargs)
|
| 40 |
+
|
| 41 |
+
# Replace the original method with the GPU-decorated version
|
| 42 |
+
ModelInference.run_inference = gpu_run_inference
|
| 43 |
+
|
| 44 |
+
# Initialize and launch the app
|
| 45 |
+
if __name__ == "__main__":
|
| 46 |
+
# Configure directories for Hugging Face Spaces
|
| 47 |
+
model_dir = os.environ.get("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")
|
| 48 |
+
workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "workspace/gradio")
|
| 49 |
+
gallery_dir = os.environ.get("DA3_GALLERY_DIR", "workspace/gallery")
|
| 50 |
+
|
| 51 |
+
# Create directories if they don't exist
|
| 52 |
+
os.makedirs(workspace_dir, exist_ok=True)
|
| 53 |
+
os.makedirs(gallery_dir, exist_ok=True)
|
| 54 |
+
|
| 55 |
+
# Initialize the app
|
| 56 |
+
app = DepthAnything3App(
|
| 57 |
+
model_dir=model_dir,
|
| 58 |
+
workspace_dir=workspace_dir,
|
| 59 |
+
gallery_dir=gallery_dir
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Launch with Spaces-friendly settings
|
| 63 |
+
print("🚀 Launching Depth Anything 3 on Hugging Face Spaces...")
|
| 64 |
+
print(f"📦 Model Directory: {model_dir}")
|
| 65 |
+
print(f"📁 Workspace Directory: {workspace_dir}")
|
| 66 |
+
print(f"🖼️ Gallery Directory: {gallery_dir}")
|
| 67 |
+
|
| 68 |
+
app.launch(
|
| 69 |
+
host="0.0.0.0", # Required for Spaces
|
| 70 |
+
port=7860, # Standard Gradio port
|
| 71 |
+
share=False, # Not needed on Spaces
|
| 72 |
+
debug=False
|
| 73 |
+
)
|
example_spaces_gpu.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Simple example demonstrating @spaces.GPU decorator usage.
|
| 3 |
+
|
| 4 |
+
This example shows how the @spaces.GPU decorator works:
|
| 5 |
+
- Variables created outside the decorated function stay on CPU initially
|
| 6 |
+
- When the decorated function is called, the process moves to GPU environment
|
| 7 |
+
- Inside the decorated function, tensors can access CUDA
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import spaces
|
| 12 |
+
import torch
|
| 13 |
+
|
| 14 |
+
# This tensor is created at module load time
|
| 15 |
+
# On HF Spaces, it will be on CPU until a @spaces.GPU function is called
|
| 16 |
+
zero = torch.Tensor([0])
|
| 17 |
+
|
| 18 |
+
# Try to move to cuda - will fail gracefully if no GPU available
|
| 19 |
+
try:
|
| 20 |
+
zero = zero.cuda()
|
| 21 |
+
print(f"Initial device: {zero.device}") # On Spaces: shows 'cpu' 🤔
|
| 22 |
+
except:
|
| 23 |
+
print(f"Initial device: {zero.device}") # cpu (no GPU available yet)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@spaces.GPU(duration=60) # Request GPU for up to 60 seconds
|
| 27 |
+
def greet(n):
|
| 28 |
+
"""
|
| 29 |
+
This function runs on GPU when called.
|
| 30 |
+
The @spaces.GPU decorator ensures GPU access.
|
| 31 |
+
"""
|
| 32 |
+
# Inside the decorated function, we have GPU access
|
| 33 |
+
print(f"Inside GPU function - device: {zero.device}") # On Spaces: shows 'cuda:0' 🤗
|
| 34 |
+
|
| 35 |
+
# Perform GPU computation
|
| 36 |
+
result = zero + n
|
| 37 |
+
|
| 38 |
+
return f"Hello {result.item()} Tensor! (computed on {zero.device})"
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# Create Gradio interface
|
| 42 |
+
demo = gr.Interface(
|
| 43 |
+
fn=greet,
|
| 44 |
+
inputs=gr.Number(value=42, label="Enter a number"),
|
| 45 |
+
outputs=gr.Text(label="Result"),
|
| 46 |
+
title="Spaces GPU Example",
|
| 47 |
+
description="Demonstrates @spaces.GPU decorator usage"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
if __name__ == "__main__":
|
| 51 |
+
demo.launch()
|
| 52 |
+
|
packages.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
build-essential
|
| 2 |
+
git
|
| 3 |
+
|
pyproject.toml
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["hatchling>=1.25", "hatch-vcs>=0.4"]
|
| 3 |
+
build-backend = "hatchling.build"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "depth-anything-3"
|
| 7 |
+
version = "0.0.0"
|
| 8 |
+
description = "Depth Anything 3"
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
requires-python = ">=3.11"
|
| 11 |
+
license = { text = "Apache-2.0" }
|
| 12 |
+
authors = [{ name = "Your Name" }]
|
| 13 |
+
|
| 14 |
+
dependencies = [
|
| 15 |
+
"pre-commit",
|
| 16 |
+
"trimesh",
|
| 17 |
+
"torch>=2",
|
| 18 |
+
"torchvision",
|
| 19 |
+
"einops",
|
| 20 |
+
"huggingface_hub",
|
| 21 |
+
"imageio",
|
| 22 |
+
"numpy<2",
|
| 23 |
+
"opencv-python",
|
| 24 |
+
"xformers",
|
| 25 |
+
"open3d",
|
| 26 |
+
"fastapi",
|
| 27 |
+
"unicorn",
|
| 28 |
+
"requests",
|
| 29 |
+
"typer",
|
| 30 |
+
"pillow",
|
| 31 |
+
"omegaconf",
|
| 32 |
+
"evo",
|
| 33 |
+
"e3nn",
|
| 34 |
+
"moviepy",
|
| 35 |
+
"plyfile",
|
| 36 |
+
"pillow_heif",
|
| 37 |
+
"safetensors",
|
| 38 |
+
"uvicorn",
|
| 39 |
+
"moviepy==1.0.3",
|
| 40 |
+
"typer>=0.9.0",
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
[project.optional-dependencies]
|
| 44 |
+
app = ["gradio>=5", "pillow>=9.0", "spaces"] # requires that python3>=3.10
|
| 45 |
+
gs = ["gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70"]
|
| 46 |
+
all = ["depth-anything-3[app,gs]"]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
[project.scripts]
|
| 50 |
+
da3 = "depth_anything_3.cli:app"
|
| 51 |
+
|
| 52 |
+
[project.urls]
|
| 53 |
+
Homepage = "https://github.com/ByteDance-Seed/Depth-Anything-3"
|
| 54 |
+
|
| 55 |
+
[tool.hatch.version]
|
| 56 |
+
source = "vcs"
|
| 57 |
+
|
| 58 |
+
[tool.hatch.build.targets.wheel]
|
| 59 |
+
packages = ["src/depth_anything_3"]
|
| 60 |
+
|
| 61 |
+
[tool.hatch.build.targets.sdist]
|
| 62 |
+
include = [
|
| 63 |
+
"/README.md",
|
| 64 |
+
"/pyproject.toml",
|
| 65 |
+
"/src/depth_anything_3",
|
| 66 |
+
]
|
| 67 |
+
|
| 68 |
+
[tool.hatch.metadata]
|
| 69 |
+
allow-direct-references = true
|
| 70 |
+
|
| 71 |
+
[tool.mypy]
|
| 72 |
+
plugins = ["jaxtyping.mypy_plugin"]
|
| 73 |
+
|
| 74 |
+
[tool.black]
|
| 75 |
+
line-length = 99
|
| 76 |
+
target-version = ['py37', 'py38', 'py39', 'py310', 'py311']
|
| 77 |
+
include = '\.pyi?$'
|
| 78 |
+
exclude = '''
|
| 79 |
+
/(
|
| 80 |
+
| \.git
|
| 81 |
+
)/
|
| 82 |
+
'''
|
| 83 |
+
|
| 84 |
+
[tool.isort]
|
| 85 |
+
profile = "black"
|
| 86 |
+
multi_line_output = 3
|
| 87 |
+
include_trailing_comma = true
|
| 88 |
+
known_third_party = ["bson","cruise","cv2","dataloader","diffusers","omegaconf","tensorflow","torch","torchvision","transformers","gsplat"]
|
| 89 |
+
known_first_party = ["common", "data", "models", "projects"]
|
| 90 |
+
sections = ["FUTURE","STDLIB","THIRDPARTY","FIRSTPARTY","LOCALFOLDER"]
|
| 91 |
+
skip_gitignore = true
|
| 92 |
+
line_length = 99
|
| 93 |
+
no_lines_before="THIRDPARTY"
|
requirements-basic.txt
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Basic requirements without gsplat (for faster build)
|
| 2 |
+
# Use this if gsplat build fails on HF Spaces
|
| 3 |
+
# To use: rename this to requirements.txt
|
| 4 |
+
|
| 5 |
+
# Core dependencies
|
| 6 |
+
torch>=2.0.0
|
| 7 |
+
torchvision
|
| 8 |
+
einops
|
| 9 |
+
huggingface_hub
|
| 10 |
+
numpy<2
|
| 11 |
+
opencv-python
|
| 12 |
+
|
| 13 |
+
# Gradio and Spaces
|
| 14 |
+
gradio>=5.0.0
|
| 15 |
+
spaces
|
| 16 |
+
pillow>=9.0
|
| 17 |
+
|
| 18 |
+
# 3D and visualization
|
| 19 |
+
trimesh
|
| 20 |
+
open3d
|
| 21 |
+
plyfile
|
| 22 |
+
|
| 23 |
+
# Image processing
|
| 24 |
+
imageio
|
| 25 |
+
pillow_heif
|
| 26 |
+
safetensors
|
| 27 |
+
|
| 28 |
+
# Video processing
|
| 29 |
+
moviepy==1.0.3
|
| 30 |
+
|
| 31 |
+
# Math and geometry
|
| 32 |
+
e3nn
|
| 33 |
+
|
| 34 |
+
# Utilities
|
| 35 |
+
requests
|
| 36 |
+
omegaconf
|
| 37 |
+
xformers
|
| 38 |
+
|
| 39 |
+
# NOTE: gsplat is NOT included in this version
|
| 40 |
+
# 3DGS features will be disabled
|
| 41 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
+
torch>=2.0.0
|
| 3 |
+
torchvision
|
| 4 |
+
einops
|
| 5 |
+
huggingface_hub
|
| 6 |
+
numpy<2
|
| 7 |
+
opencv-python
|
| 8 |
+
|
| 9 |
+
# Gradio and Spaces
|
| 10 |
+
gradio>=5.0.0
|
| 11 |
+
spaces
|
| 12 |
+
pillow>=9.0
|
| 13 |
+
|
| 14 |
+
# 3D and visualization
|
| 15 |
+
trimesh
|
| 16 |
+
open3d
|
| 17 |
+
plyfile
|
| 18 |
+
|
| 19 |
+
# Image processing
|
| 20 |
+
imageio
|
| 21 |
+
pillow_heif
|
| 22 |
+
safetensors
|
| 23 |
+
|
| 24 |
+
# Video processing
|
| 25 |
+
moviepy==1.0.3
|
| 26 |
+
|
| 27 |
+
# Math and geometry
|
| 28 |
+
e3nn
|
| 29 |
+
|
| 30 |
+
# Utilities
|
| 31 |
+
requests
|
| 32 |
+
omegaconf
|
| 33 |
+
xformers
|
| 34 |
+
|
| 35 |
+
# 3D Gaussian Splatting
|
| 36 |
+
# Note: This requires CUDA during build. If build fails on Spaces, see alternative solutions.
|
| 37 |
+
gsplat @ git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70
|
| 38 |
+
|
runtime.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
python-3.11
|
| 2 |
+
|
src/depth_anything_3/__pycache__/api.cpython-311.pyc
ADDED
|
Binary file (17.9 kB). View file
|
|
|
src/depth_anything_3/__pycache__/cfg.cpython-311.pyc
ADDED
|
Binary file (6.98 kB). View file
|
|
|
src/depth_anything_3/__pycache__/cli.cpython-311.pyc
ADDED
|
Binary file (27.2 kB). View file
|
|
|
src/depth_anything_3/__pycache__/registry.cpython-311.pyc
ADDED
|
Binary file (1.71 kB). View file
|
|
|
src/depth_anything_3/__pycache__/specs.cpython-311.pyc
ADDED
|
Binary file (1.73 kB). View file
|
|
|
src/depth_anything_3/api.py
ADDED
|
@@ -0,0 +1,414 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
"""
|
| 15 |
+
Depth Anything 3 API module.
|
| 16 |
+
|
| 17 |
+
This module provides the main API for Depth Anything 3, including model loading,
|
| 18 |
+
inference, and export capabilities. It supports both single and nested model architectures.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
import time
|
| 24 |
+
from typing import Optional, Sequence
|
| 25 |
+
import numpy as np
|
| 26 |
+
import torch
|
| 27 |
+
import torch.nn as nn
|
| 28 |
+
from huggingface_hub import PyTorchModelHubMixin
|
| 29 |
+
from PIL import Image
|
| 30 |
+
|
| 31 |
+
from depth_anything_3.cfg import create_object, load_config
|
| 32 |
+
from depth_anything_3.registry import MODEL_REGISTRY
|
| 33 |
+
from depth_anything_3.specs import Prediction
|
| 34 |
+
from depth_anything_3.utils.export import export
|
| 35 |
+
from depth_anything_3.utils.geometry import affine_inverse
|
| 36 |
+
from depth_anything_3.utils.io.input_processor import InputProcessor
|
| 37 |
+
from depth_anything_3.utils.io.output_processor import OutputProcessor
|
| 38 |
+
from depth_anything_3.utils.logger import logger
|
| 39 |
+
from depth_anything_3.utils.pose_align import align_poses_umeyama
|
| 40 |
+
|
| 41 |
+
torch.backends.cudnn.benchmark = False
|
| 42 |
+
# logger.info("CUDNN Benchmark Disabled")
|
| 43 |
+
|
| 44 |
+
SAFETENSORS_NAME = "model.safetensors"
|
| 45 |
+
CONFIG_NAME = "config.json"
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class DepthAnything3(nn.Module, PyTorchModelHubMixin):
|
| 49 |
+
"""
|
| 50 |
+
Depth Anything 3 main API class.
|
| 51 |
+
|
| 52 |
+
This class provides a high-level interface for depth estimation using Depth Anything 3.
|
| 53 |
+
It supports both single and nested model architectures with metric scaling capabilities.
|
| 54 |
+
|
| 55 |
+
Features:
|
| 56 |
+
- Hugging Face Hub integration via PyTorchModelHubMixin
|
| 57 |
+
- Support for multiple model presets (vitb, vitg, nested variants)
|
| 58 |
+
- Automatic mixed precision inference
|
| 59 |
+
- Export capabilities for various formats (GLB, PLY, NPZ, etc.)
|
| 60 |
+
- Camera pose estimation and metric depth scaling
|
| 61 |
+
|
| 62 |
+
Usage:
|
| 63 |
+
# Load from Hugging Face Hub
|
| 64 |
+
model = DepthAnything3.from_pretrained("huggingface/model-name")
|
| 65 |
+
|
| 66 |
+
# Or create with specific preset
|
| 67 |
+
model = DepthAnything3(preset="vitg")
|
| 68 |
+
|
| 69 |
+
# Run inference
|
| 70 |
+
prediction = model.inference(images, export_dir="output", export_format="glb")
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
_commit_hash: str | None = None # Set by mixin when loading from Hub
|
| 74 |
+
|
| 75 |
+
def __init__(self, model_name: str = "da3-large", **kwargs):
|
| 76 |
+
"""
|
| 77 |
+
Initialize DepthAnything3 with specified preset.
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
model_name: The name of the model preset to use.
|
| 81 |
+
Examples: 'da3-giant', 'da3-large', 'da3metric-large', 'da3nested-giant-large'.
|
| 82 |
+
**kwargs: Additional keyword arguments (currently unused).
|
| 83 |
+
"""
|
| 84 |
+
super().__init__()
|
| 85 |
+
self.model_name = model_name
|
| 86 |
+
|
| 87 |
+
# Build the underlying network
|
| 88 |
+
self.config = load_config(MODEL_REGISTRY[self.model_name])
|
| 89 |
+
self.model = create_object(self.config)
|
| 90 |
+
self.model.eval()
|
| 91 |
+
|
| 92 |
+
# Initialize processors
|
| 93 |
+
self.input_processor = InputProcessor()
|
| 94 |
+
self.output_processor = OutputProcessor()
|
| 95 |
+
|
| 96 |
+
# Device management (set by user)
|
| 97 |
+
self.device = None
|
| 98 |
+
|
| 99 |
+
@torch.inference_mode()
|
| 100 |
+
def forward(
|
| 101 |
+
self,
|
| 102 |
+
image: torch.Tensor,
|
| 103 |
+
extrinsics: torch.Tensor | None = None,
|
| 104 |
+
intrinsics: torch.Tensor | None = None,
|
| 105 |
+
export_feat_layers: list[int] | None = None,
|
| 106 |
+
infer_gs: bool = False,
|
| 107 |
+
) -> dict[str, torch.Tensor]:
|
| 108 |
+
"""
|
| 109 |
+
Forward pass through the model.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
image: Input batch with shape ``(B, N, 3, H, W)`` on the model device.
|
| 113 |
+
extrinsics: Optional camera extrinsics with shape ``(B, N, 4, 4)``.
|
| 114 |
+
intrinsics: Optional camera intrinsics with shape ``(B, N, 3, 3)``.
|
| 115 |
+
export_feat_layers: Layer indices to return intermediate features for.
|
| 116 |
+
|
| 117 |
+
Returns:
|
| 118 |
+
Dictionary containing model predictions
|
| 119 |
+
"""
|
| 120 |
+
# Determine optimal autocast dtype
|
| 121 |
+
autocast_dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
|
| 122 |
+
with torch.no_grad():
|
| 123 |
+
with torch.autocast(device_type=image.device.type, dtype=autocast_dtype):
|
| 124 |
+
return self.model(image, extrinsics, intrinsics, export_feat_layers, infer_gs)
|
| 125 |
+
|
| 126 |
+
def inference(
|
| 127 |
+
self,
|
| 128 |
+
image: list[np.ndarray | Image.Image | str],
|
| 129 |
+
extrinsics: np.ndarray | None = None,
|
| 130 |
+
intrinsics: np.ndarray | None = None,
|
| 131 |
+
align_to_input_ext_scale: bool = True,
|
| 132 |
+
infer_gs: bool = False,
|
| 133 |
+
render_exts: np.ndarray | None = None,
|
| 134 |
+
render_ixts: np.ndarray | None = None,
|
| 135 |
+
render_hw: tuple[int, int] | None = None,
|
| 136 |
+
process_res: int = 504,
|
| 137 |
+
process_res_method: str = "upper_bound_resize",
|
| 138 |
+
export_dir: str | None = None,
|
| 139 |
+
export_format: str = "mini_npz",
|
| 140 |
+
export_feat_layers: Sequence[int] | None = None,
|
| 141 |
+
# GLB export parameters
|
| 142 |
+
conf_thresh_percentile: float = 40.0,
|
| 143 |
+
num_max_points: int = 1_000_000,
|
| 144 |
+
show_cameras: bool = True,
|
| 145 |
+
# Feat_vis export parameters
|
| 146 |
+
feat_vis_fps: int = 15,
|
| 147 |
+
export_kwargs: Optional[dict] = {},
|
| 148 |
+
) -> Prediction:
|
| 149 |
+
"""
|
| 150 |
+
Run inference on input images.
|
| 151 |
+
|
| 152 |
+
Args:
|
| 153 |
+
image: List of input images (numpy arrays, PIL Images, or file paths)
|
| 154 |
+
extrinsics: Camera extrinsics (N, 4, 4)
|
| 155 |
+
intrinsics: Camera intrinsics (N, 3, 3)
|
| 156 |
+
align_to_input_ext_scale: whether to align the input pose scale to the prediction
|
| 157 |
+
infer_gs: Enable the 3D Gaussian branch (needed for `gs_ply`/`gs_video` exports)
|
| 158 |
+
render_exts: Optional render extrinsics for Gaussian video export
|
| 159 |
+
render_ixts: Optional render intrinsics for Gaussian video export
|
| 160 |
+
render_hw: Optional render resolution for Gaussian video export
|
| 161 |
+
process_res: Processing resolution
|
| 162 |
+
process_res_method: Resize method for processing
|
| 163 |
+
export_dir: Directory to export results
|
| 164 |
+
export_format: Export format (mini_npz, npz, glb, ply, gs, gs_video)
|
| 165 |
+
export_feat_layers: Layer indices to export intermediate features from
|
| 166 |
+
conf_thresh_percentile: [GLB] Lower percentile for adaptive confidence threshold (default: 40.0) # noqa: E501
|
| 167 |
+
num_max_points: [GLB] Maximum number of points in the point cloud (default: 1,000,000)
|
| 168 |
+
show_cameras: [GLB] Show camera wireframes in the exported scene (default: True)
|
| 169 |
+
feat_vis_fps: [FEAT_VIS] Frame rate for output video (default: 15)
|
| 170 |
+
export_kwargs: additional arguments to export functions.
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
Prediction object containing depth maps and camera parameters
|
| 174 |
+
"""
|
| 175 |
+
if "gs" in export_format:
|
| 176 |
+
assert infer_gs, "must set `infer_gs=True` to perform gs-related export."
|
| 177 |
+
|
| 178 |
+
# Preprocess images
|
| 179 |
+
imgs_cpu, extrinsics, intrinsics = self._preprocess_inputs(
|
| 180 |
+
image, extrinsics, intrinsics, process_res, process_res_method
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
# Prepare tensors for model
|
| 184 |
+
imgs, ex_t, in_t = self._prepare_model_inputs(imgs_cpu, extrinsics, intrinsics)
|
| 185 |
+
|
| 186 |
+
# Normalize extrinsics
|
| 187 |
+
ex_t_norm = self._normalize_extrinsics(ex_t.clone() if ex_t is not None else None)
|
| 188 |
+
|
| 189 |
+
# Run model forward pass
|
| 190 |
+
export_feat_layers = list(export_feat_layers) if export_feat_layers is not None else []
|
| 191 |
+
|
| 192 |
+
raw_output = self._run_model_forward(imgs, ex_t_norm, in_t, export_feat_layers, infer_gs)
|
| 193 |
+
|
| 194 |
+
# Convert raw output to prediction
|
| 195 |
+
prediction = self._convert_to_prediction(raw_output)
|
| 196 |
+
|
| 197 |
+
# Align prediction to extrinsincs
|
| 198 |
+
prediction = self._align_to_input_extrinsics_intrinsics(
|
| 199 |
+
extrinsics, intrinsics, prediction, align_to_input_ext_scale
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
# Add processed images for visualization
|
| 203 |
+
prediction = self._add_processed_images(prediction, imgs_cpu)
|
| 204 |
+
|
| 205 |
+
# Export if requested
|
| 206 |
+
if export_dir is not None:
|
| 207 |
+
|
| 208 |
+
if "gs" in export_format:
|
| 209 |
+
if infer_gs and "gs_video" not in export_format:
|
| 210 |
+
export_format = f"{export_format}-gs_video"
|
| 211 |
+
if "gs_video" in export_format:
|
| 212 |
+
if "gs_video" not in export_kwargs:
|
| 213 |
+
export_kwargs["gs_video"] = {}
|
| 214 |
+
export_kwargs["gs_video"].update(
|
| 215 |
+
{
|
| 216 |
+
"extrinsics": render_exts,
|
| 217 |
+
"intrinsics": render_ixts,
|
| 218 |
+
"out_image_hw": render_hw,
|
| 219 |
+
}
|
| 220 |
+
)
|
| 221 |
+
# Add GLB export parameters
|
| 222 |
+
if "glb" in export_format:
|
| 223 |
+
if "glb" not in export_kwargs:
|
| 224 |
+
export_kwargs["glb"] = {}
|
| 225 |
+
export_kwargs["glb"].update(
|
| 226 |
+
{
|
| 227 |
+
"conf_thresh_percentile": conf_thresh_percentile,
|
| 228 |
+
"num_max_points": num_max_points,
|
| 229 |
+
"show_cameras": show_cameras,
|
| 230 |
+
}
|
| 231 |
+
)
|
| 232 |
+
# Add Feat_vis export parameters
|
| 233 |
+
if "feat_vis" in export_format:
|
| 234 |
+
if "feat_vis" not in export_kwargs:
|
| 235 |
+
export_kwargs["feat_vis"] = {}
|
| 236 |
+
export_kwargs["feat_vis"].update(
|
| 237 |
+
{
|
| 238 |
+
"fps": feat_vis_fps,
|
| 239 |
+
}
|
| 240 |
+
)
|
| 241 |
+
self._export_results(prediction, export_format, export_dir, **export_kwargs)
|
| 242 |
+
|
| 243 |
+
return prediction
|
| 244 |
+
|
| 245 |
+
def _preprocess_inputs(
|
| 246 |
+
self,
|
| 247 |
+
image: list[np.ndarray | Image.Image | str],
|
| 248 |
+
extrinsics: np.ndarray | None = None,
|
| 249 |
+
intrinsics: np.ndarray | None = None,
|
| 250 |
+
process_res: int = 504,
|
| 251 |
+
process_res_method: str = "upper_bound_resize",
|
| 252 |
+
) -> torch.Tensor:
|
| 253 |
+
"""Preprocess input images using input processor."""
|
| 254 |
+
start_time = time.time()
|
| 255 |
+
imgs_cpu, extrinsics, intrinsics = self.input_processor(
|
| 256 |
+
image,
|
| 257 |
+
extrinsics.copy() if extrinsics is not None else None,
|
| 258 |
+
intrinsics.copy() if intrinsics is not None else None,
|
| 259 |
+
process_res,
|
| 260 |
+
process_res_method,
|
| 261 |
+
)
|
| 262 |
+
end_time = time.time()
|
| 263 |
+
logger.info(
|
| 264 |
+
"Processed Images Done taking",
|
| 265 |
+
end_time - start_time,
|
| 266 |
+
"seconds. Shape: ",
|
| 267 |
+
imgs_cpu.shape,
|
| 268 |
+
)
|
| 269 |
+
return imgs_cpu, extrinsics, intrinsics
|
| 270 |
+
|
| 271 |
+
def _prepare_model_inputs(
|
| 272 |
+
self,
|
| 273 |
+
imgs_cpu: torch.Tensor,
|
| 274 |
+
extrinsics: torch.tensor | None,
|
| 275 |
+
intrinsics: torch.tensor | None,
|
| 276 |
+
) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]:
|
| 277 |
+
"""Prepare tensors for model input."""
|
| 278 |
+
device = self._get_model_device()
|
| 279 |
+
|
| 280 |
+
# Move images to model device
|
| 281 |
+
imgs = imgs_cpu.to(device, non_blocking=True)[None].float()
|
| 282 |
+
|
| 283 |
+
# Convert camera parameters to tensors
|
| 284 |
+
ex_t = (
|
| 285 |
+
extrinsics.to(device, non_blocking=True)[None].float()
|
| 286 |
+
if extrinsics is not None
|
| 287 |
+
else None
|
| 288 |
+
)
|
| 289 |
+
in_t = (
|
| 290 |
+
intrinsics.to(device, non_blocking=True)[None].float()
|
| 291 |
+
if intrinsics is not None
|
| 292 |
+
else None
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
return imgs, ex_t, in_t
|
| 296 |
+
|
| 297 |
+
def _normalize_extrinsics(self, ex_t: torch.Tensor) -> torch.Tensor:
|
| 298 |
+
"""Normalize extrinsics"""
|
| 299 |
+
if ex_t is None:
|
| 300 |
+
return None
|
| 301 |
+
transform = affine_inverse(ex_t[:, :1])
|
| 302 |
+
ex_t_norm = ex_t @ transform
|
| 303 |
+
c2ws = affine_inverse(ex_t_norm)
|
| 304 |
+
translations = c2ws[..., :3, 3]
|
| 305 |
+
dists = translations.norm(dim=-1)
|
| 306 |
+
median_dist = torch.median(dists)
|
| 307 |
+
median_dist = torch.clamp(median_dist, min=1e-1)
|
| 308 |
+
ex_t_norm[..., :3, 3] = ex_t_norm[..., :3, 3] / median_dist
|
| 309 |
+
return ex_t_norm
|
| 310 |
+
|
| 311 |
+
def _align_to_input_extrinsics_intrinsics(
|
| 312 |
+
self,
|
| 313 |
+
extrinsics: torch.Tensor,
|
| 314 |
+
intrinsics: torch.Tensor,
|
| 315 |
+
prediction: Prediction,
|
| 316 |
+
align_to_input_ext_scale: bool = True,
|
| 317 |
+
ransac_view_thresh: int = 10,
|
| 318 |
+
) -> Prediction:
|
| 319 |
+
"""Align depth map to input extrinsics"""
|
| 320 |
+
if extrinsics is None:
|
| 321 |
+
return prediction
|
| 322 |
+
prediction.intrinsics = intrinsics.numpy()
|
| 323 |
+
_, _, scale, aligned_extrinsics = align_poses_umeyama(
|
| 324 |
+
prediction.extrinsics,
|
| 325 |
+
extrinsics.numpy(),
|
| 326 |
+
ransac=len(extrinsics) >= ransac_view_thresh,
|
| 327 |
+
return_aligned=True,
|
| 328 |
+
random_state=42,
|
| 329 |
+
)
|
| 330 |
+
if align_to_input_ext_scale:
|
| 331 |
+
prediction.extrinsics = extrinsics[..., :3, :].numpy()
|
| 332 |
+
prediction.depth /= scale
|
| 333 |
+
else:
|
| 334 |
+
prediction.extrinsics = aligned_extrinsics
|
| 335 |
+
return prediction
|
| 336 |
+
|
| 337 |
+
def _run_model_forward(
|
| 338 |
+
self,
|
| 339 |
+
imgs: torch.Tensor,
|
| 340 |
+
ex_t: torch.Tensor | None,
|
| 341 |
+
in_t: torch.Tensor | None,
|
| 342 |
+
export_feat_layers: Sequence[int] | None = None,
|
| 343 |
+
infer_gs: bool = False,
|
| 344 |
+
) -> dict[str, torch.Tensor]:
|
| 345 |
+
"""Run model forward pass."""
|
| 346 |
+
device = imgs.device
|
| 347 |
+
need_sync = device.type == "cuda"
|
| 348 |
+
if need_sync:
|
| 349 |
+
torch.cuda.synchronize(device)
|
| 350 |
+
start_time = time.time()
|
| 351 |
+
feat_layers = list(export_feat_layers) if export_feat_layers is not None else None
|
| 352 |
+
output = self.forward(imgs, ex_t, in_t, feat_layers, infer_gs)
|
| 353 |
+
if need_sync:
|
| 354 |
+
torch.cuda.synchronize(device)
|
| 355 |
+
end_time = time.time()
|
| 356 |
+
logger.info(f"Model Forward Pass Done. Time: {end_time - start_time} seconds")
|
| 357 |
+
return output
|
| 358 |
+
|
| 359 |
+
def _convert_to_prediction(self, raw_output: dict[str, torch.Tensor]) -> Prediction:
|
| 360 |
+
"""Convert raw model output to Prediction object."""
|
| 361 |
+
start_time = time.time()
|
| 362 |
+
output = self.output_processor(raw_output)
|
| 363 |
+
end_time = time.time()
|
| 364 |
+
logger.info(f"Conversion to Prediction Done. Time: {end_time - start_time} seconds")
|
| 365 |
+
return output
|
| 366 |
+
|
| 367 |
+
def _add_processed_images(self, prediction: Prediction, imgs_cpu: torch.Tensor) -> Prediction:
|
| 368 |
+
"""Add processed images to prediction for visualization."""
|
| 369 |
+
# Convert from (N, 3, H, W) to (N, H, W, 3) and denormalize
|
| 370 |
+
processed_imgs = imgs_cpu.permute(0, 2, 3, 1).cpu().numpy() # (N, H, W, 3)
|
| 371 |
+
|
| 372 |
+
# Denormalize from ImageNet normalization
|
| 373 |
+
mean = np.array([0.485, 0.456, 0.406])
|
| 374 |
+
std = np.array([0.229, 0.224, 0.225])
|
| 375 |
+
processed_imgs = processed_imgs * std + mean
|
| 376 |
+
processed_imgs = np.clip(processed_imgs, 0, 1)
|
| 377 |
+
processed_imgs = (processed_imgs * 255).astype(np.uint8)
|
| 378 |
+
|
| 379 |
+
prediction.processed_images = processed_imgs
|
| 380 |
+
return prediction
|
| 381 |
+
|
| 382 |
+
def _export_results(
|
| 383 |
+
self, prediction: Prediction, export_format: str, export_dir: str, **kwargs
|
| 384 |
+
) -> None:
|
| 385 |
+
"""Export results to specified format and directory."""
|
| 386 |
+
start_time = time.time()
|
| 387 |
+
export(prediction, export_format, export_dir, **kwargs)
|
| 388 |
+
end_time = time.time()
|
| 389 |
+
logger.info(f"Export Results Done. Time: {end_time - start_time} seconds")
|
| 390 |
+
|
| 391 |
+
def _get_model_device(self) -> torch.device:
|
| 392 |
+
"""
|
| 393 |
+
Get the device where the model is located.
|
| 394 |
+
|
| 395 |
+
Returns:
|
| 396 |
+
Device where the model parameters are located
|
| 397 |
+
|
| 398 |
+
Raises:
|
| 399 |
+
ValueError: If no tensors are found in the model
|
| 400 |
+
"""
|
| 401 |
+
if self.device is not None:
|
| 402 |
+
return self.device
|
| 403 |
+
|
| 404 |
+
# Find device from parameters
|
| 405 |
+
for param in self.parameters():
|
| 406 |
+
self.device = param.device
|
| 407 |
+
return param.device
|
| 408 |
+
|
| 409 |
+
# Find device from buffers
|
| 410 |
+
for buffer in self.buffers():
|
| 411 |
+
self.device = buffer.device
|
| 412 |
+
return buffer.device
|
| 413 |
+
|
| 414 |
+
raise ValueError("No tensor found in model")
|
src/depth_anything_3/app/__pycache__/css_and_html.cpython-311.pyc
ADDED
|
Binary file (18.5 kB). View file
|
|
|
src/depth_anything_3/app/__pycache__/gradio_app.cpython-311.pyc
ADDED
|
Binary file (27.9 kB). View file
|
|
|
src/depth_anything_3/app/css_and_html.py
ADDED
|
@@ -0,0 +1,594 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# flake8: noqa: E501
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
+
|
| 17 |
+
"""
|
| 18 |
+
CSS and HTML content for the Depth Anything 3 Gradio application.
|
| 19 |
+
This module contains all the CSS styles and HTML content blocks
|
| 20 |
+
used in the Gradio interface.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
# CSS Styles for the Gradio interface
|
| 24 |
+
GRADIO_CSS = """
|
| 25 |
+
/* Add Font Awesome CDN with all styles including brands and colors */
|
| 26 |
+
@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');
|
| 27 |
+
|
| 28 |
+
/* Add custom styles for colored icons */
|
| 29 |
+
.fa-color-blue {
|
| 30 |
+
color: #3b82f6;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.fa-color-purple {
|
| 34 |
+
color: #8b5cf6;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.fa-color-cyan {
|
| 38 |
+
color: #06b6d4;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
.fa-color-green {
|
| 42 |
+
color: #10b981;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.fa-color-yellow {
|
| 46 |
+
color: #f59e0b;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.fa-color-red {
|
| 50 |
+
color: #ef4444;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
.link-btn {
|
| 54 |
+
display: inline-flex;
|
| 55 |
+
align-items: center;
|
| 56 |
+
gap: 8px;
|
| 57 |
+
text-decoration: none;
|
| 58 |
+
padding: 12px 24px;
|
| 59 |
+
border-radius: 50px;
|
| 60 |
+
font-weight: 500;
|
| 61 |
+
transition: all 0.3s ease;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
/* Dark mode tech theme */
|
| 65 |
+
@media (prefers-color-scheme: dark) {
|
| 66 |
+
html, body {
|
| 67 |
+
background: #1e293b;
|
| 68 |
+
color: #ffffff;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
.gradio-container {
|
| 72 |
+
background: #1e293b;
|
| 73 |
+
color: #ffffff;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
.link-btn {
|
| 77 |
+
background: rgba(255, 255, 255, 0.2);
|
| 78 |
+
color: white;
|
| 79 |
+
backdrop-filter: blur(10px);
|
| 80 |
+
border: 1px solid rgba(255, 255, 255, 0.3);
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
.link-btn:hover {
|
| 84 |
+
background: rgba(255, 255, 255, 0.3);
|
| 85 |
+
transform: translateY(-2px);
|
| 86 |
+
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.2);
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.tech-bg {
|
| 90 |
+
background: linear-gradient(135deg, #0f172a, #1e293b); /* Darker colors */
|
| 91 |
+
position: relative;
|
| 92 |
+
overflow: hidden;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.tech-bg::before {
|
| 96 |
+
content: '';
|
| 97 |
+
position: absolute;
|
| 98 |
+
top: 0;
|
| 99 |
+
left: 0;
|
| 100 |
+
right: 0;
|
| 101 |
+
bottom: 0;
|
| 102 |
+
background:
|
| 103 |
+
radial-gradient(circle at 20% 80%, rgba(59, 130, 246, 0.15) 0%, transparent 50%), /* Reduced opacity */
|
| 104 |
+
radial-gradient(circle at 80% 20%, rgba(139, 92, 246, 0.15) 0%, transparent 50%), /* Reduced opacity */
|
| 105 |
+
radial-gradient(circle at 40% 40%, rgba(18, 194, 233, 0.1) 0%, transparent 50%); /* Reduced opacity */
|
| 106 |
+
animation: techPulse 8s ease-in-out infinite;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
.gradio-container .panel,
|
| 110 |
+
.gradio-container .block,
|
| 111 |
+
.gradio-container .form {
|
| 112 |
+
background: rgba(0, 0, 0, 0.3);
|
| 113 |
+
border: 1px solid rgba(59, 130, 246, 0.2);
|
| 114 |
+
border-radius: 10px;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.gradio-container * {
|
| 118 |
+
color: #ffffff;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.gradio-container label {
|
| 122 |
+
color: #e0e0e0;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.gradio-container .markdown {
|
| 126 |
+
color: #e0e0e0;
|
| 127 |
+
}
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
/* Light mode tech theme */
|
| 131 |
+
@media (prefers-color-scheme: light) {
|
| 132 |
+
html, body {
|
| 133 |
+
background: #ffffff;
|
| 134 |
+
color: #1e293b;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
.gradio-container {
|
| 138 |
+
background: #ffffff;
|
| 139 |
+
color: #1e293b;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
.tech-bg {
|
| 143 |
+
background: linear-gradient(135deg, #ffffff, #f1f5f9);
|
| 144 |
+
position: relative;
|
| 145 |
+
overflow: hidden;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.link-btn {
|
| 149 |
+
background: rgba(59, 130, 246, 0.15);
|
| 150 |
+
color: var(--body-text-color);
|
| 151 |
+
border: 1px solid rgba(59, 130, 246, 0.3);
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
.link-btn:hover {
|
| 155 |
+
background: rgba(59, 130, 246, 0.25);
|
| 156 |
+
transform: translateY(-2px);
|
| 157 |
+
box-shadow: 0 8px 25px rgba(59, 130, 246, 0.2);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
.tech-bg::before {
|
| 161 |
+
content: '';
|
| 162 |
+
position: absolute;
|
| 163 |
+
top: 0;
|
| 164 |
+
left: 0;
|
| 165 |
+
right: 0;
|
| 166 |
+
bottom: 0;
|
| 167 |
+
background:
|
| 168 |
+
radial-gradient(circle at 20% 80%, rgba(59, 130, 246, 0.1) 0%, transparent 50%),
|
| 169 |
+
radial-gradient(circle at 80% 20%, rgba(139, 92, 246, 0.1) 0%, transparent 50%),
|
| 170 |
+
radial-gradient(circle at 40% 40%, rgba(18, 194, 233, 0.08) 0%, transparent 50%);
|
| 171 |
+
animation: techPulse 8s ease-in-out infinite;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.gradio-container .panel,
|
| 175 |
+
.gradio-container .block,
|
| 176 |
+
.gradio-container .form {
|
| 177 |
+
background: rgba(255, 255, 255, 0.8);
|
| 178 |
+
border: 1px solid rgba(59, 130, 246, 0.3);
|
| 179 |
+
border-radius: 10px;
|
| 180 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
.gradio-container * {
|
| 184 |
+
color: #1e293b;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
.gradio-container label {
|
| 188 |
+
color: #334155;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
.gradio-container .markdown {
|
| 192 |
+
color: #334155;
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
@keyframes techPulse {
|
| 200 |
+
0%, 100% { opacity: 0.5; }
|
| 201 |
+
50% { opacity: 0.8; }
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
/* Custom log with tech gradient */
|
| 205 |
+
.custom-log * {
|
| 206 |
+
font-style: italic;
|
| 207 |
+
font-size: 22px !important;
|
| 208 |
+
background: linear-gradient(135deg, #3b82f6, #8b5cf6);
|
| 209 |
+
background-size: 400% 400%;
|
| 210 |
+
-webkit-background-clip: text;
|
| 211 |
+
background-clip: text;
|
| 212 |
+
font-weight: bold !important;
|
| 213 |
+
color: transparent !important;
|
| 214 |
+
text-align: center !important;
|
| 215 |
+
animation: techGradient 3s ease infinite;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
@keyframes techGradient {
|
| 219 |
+
0% { background-position: 0% 50%; }
|
| 220 |
+
50% { background-position: 100% 50%; }
|
| 221 |
+
100% { background-position: 0% 50%; }
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
@keyframes metricPulse {
|
| 225 |
+
0%, 100% { background-position: 0% 50%; }
|
| 226 |
+
50% { background-position: 100% 50%; }
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
@keyframes pointcloudPulse {
|
| 230 |
+
0%, 100% { background-position: 0% 50%; }
|
| 231 |
+
50% { background-position: 100% 50%; }
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
@keyframes camerasPulse {
|
| 235 |
+
0%, 100% { background-position: 0% 50%; }
|
| 236 |
+
50% { background-position: 100% 50%; }
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
@keyframes gaussiansPulse {
|
| 240 |
+
0%, 100% { background-position: 0% 50%; }
|
| 241 |
+
50% { background-position: 100% 50%; }
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
/* Special colors for key terms - Global styles */
|
| 245 |
+
.metric-text {
|
| 246 |
+
background: linear-gradient(45deg, #ff6b6b, #ff8e53, #ff6b6b);
|
| 247 |
+
background-size: 200% 200%;
|
| 248 |
+
-webkit-background-clip: text;
|
| 249 |
+
background-clip: text;
|
| 250 |
+
color: transparent !important;
|
| 251 |
+
animation: metricPulse 2s ease-in-out infinite;
|
| 252 |
+
font-weight: 700;
|
| 253 |
+
text-shadow: 0 0 10px rgba(255, 107, 107, 0.5);
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
.pointcloud-text {
|
| 257 |
+
background: linear-gradient(45deg, #4ecdc4, #44a08d, #4ecdc4);
|
| 258 |
+
background-size: 200% 200%;
|
| 259 |
+
-webkit-background-clip: text;
|
| 260 |
+
background-clip: text;
|
| 261 |
+
color: transparent !important;
|
| 262 |
+
animation: pointcloudPulse 2.5s ease-in-out infinite;
|
| 263 |
+
font-weight: 700;
|
| 264 |
+
text-shadow: 0 0 10px rgba(78, 205, 196, 0.5);
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
.cameras-text {
|
| 268 |
+
background: linear-gradient(45deg, #667eea, #764ba2, #667eea);
|
| 269 |
+
background-size: 200% 200%;
|
| 270 |
+
-webkit-background-clip: text;
|
| 271 |
+
background-clip: text;
|
| 272 |
+
color: transparent !important;
|
| 273 |
+
animation: camerasPulse 3s ease-in-out infinite;
|
| 274 |
+
font-weight: 700;
|
| 275 |
+
text-shadow: 0 0 10px rgba(102, 126, 234, 0.5);
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
.gaussians-text {
|
| 279 |
+
background: linear-gradient(45deg, #f093fb, #f5576c, #f093fb);
|
| 280 |
+
background-size: 200% 200%;
|
| 281 |
+
-webkit-background-clip: text;
|
| 282 |
+
background-clip: text;
|
| 283 |
+
color: transparent !important;
|
| 284 |
+
animation: gaussiansPulse 2.2s ease-in-out infinite;
|
| 285 |
+
font-weight: 700;
|
| 286 |
+
text-shadow: 0 0 10px rgba(240, 147, 251, 0.5);
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
.example-log * {
|
| 290 |
+
font-style: italic;
|
| 291 |
+
font-size: 16px !important;
|
| 292 |
+
background: linear-gradient(135deg, #3b82f6, #8b5cf6);
|
| 293 |
+
-webkit-background-clip: text;
|
| 294 |
+
background-clip: text;
|
| 295 |
+
color: transparent !important;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
#my_radio .wrap {
|
| 299 |
+
display: flex;
|
| 300 |
+
flex-wrap: nowrap;
|
| 301 |
+
justify-content: center;
|
| 302 |
+
align-items: center;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
#my_radio .wrap label {
|
| 306 |
+
display: flex;
|
| 307 |
+
width: 50%;
|
| 308 |
+
justify-content: center;
|
| 309 |
+
align-items: center;
|
| 310 |
+
margin: 0;
|
| 311 |
+
padding: 10px 0;
|
| 312 |
+
box-sizing: border-box;
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
/* Align navigation buttons with dropdown bottom */
|
| 316 |
+
.navigation-row {
|
| 317 |
+
display: flex !important;
|
| 318 |
+
align-items: flex-end !important;
|
| 319 |
+
gap: 8px !important;
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
.navigation-row > div:nth-child(1),
|
| 323 |
+
.navigation-row > div:nth-child(3) {
|
| 324 |
+
align-self: flex-end !important;
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
.navigation-row > div:nth-child(2) {
|
| 328 |
+
flex: 1 !important;
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
/* Make thumbnails clickable with pointer cursor */
|
| 332 |
+
.clickable-thumbnail img {
|
| 333 |
+
cursor: pointer !important;
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
.clickable-thumbnail:hover img {
|
| 337 |
+
cursor: pointer !important;
|
| 338 |
+
opacity: 0.8;
|
| 339 |
+
transition: opacity 0.3s ease;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
/* Make thumbnail containers narrower horizontally */
|
| 343 |
+
.clickable-thumbnail {
|
| 344 |
+
padding: 5px 2px !important;
|
| 345 |
+
margin: 0 2px !important;
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
.clickable-thumbnail .image-container {
|
| 349 |
+
margin: 0 !important;
|
| 350 |
+
padding: 0 !important;
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
.scene-info {
|
| 354 |
+
text-align: center !important;
|
| 355 |
+
padding: 5px 2px !important;
|
| 356 |
+
margin: 0 !important;
|
| 357 |
+
}
|
| 358 |
+
"""
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def get_header_html(logo_base64=None):
|
| 362 |
+
"""
|
| 363 |
+
Generate the main header HTML with logo and title.
|
| 364 |
+
|
| 365 |
+
Args:
|
| 366 |
+
logo_base64 (str, optional): Base64 encoded logo image
|
| 367 |
+
|
| 368 |
+
Returns:
|
| 369 |
+
str: HTML string for the header
|
| 370 |
+
"""
|
| 371 |
+
return """
|
| 372 |
+
<div class="tech-bg" style="text-align: center; margin-bottom: 5px; padding: 40px 20px; border-radius: 15px; position: relative; overflow: hidden;">
|
| 373 |
+
<div style="position: relative; z-index: 2;">
|
| 374 |
+
<h1 style="margin: 0; font-size: 3.5em; font-weight: 700;
|
| 375 |
+
background: linear-gradient(135deg, #3b82f6, #8b5cf6);
|
| 376 |
+
background-size: 400% 400%;
|
| 377 |
+
-webkit-background-clip: text;
|
| 378 |
+
background-clip: text;
|
| 379 |
+
color: transparent;
|
| 380 |
+
animation: techGradient 3s ease infinite;
|
| 381 |
+
text-shadow: 0 0 30px rgba(59, 130, 246, 0.5);
|
| 382 |
+
letter-spacing: 2px;">
|
| 383 |
+
Depth Anything 3
|
| 384 |
+
</h1>
|
| 385 |
+
<p style="margin: 15px 0 0 0; font-size: 2.16em; font-weight: 300;" class="header-subtitle">
|
| 386 |
+
Recovering the Visual Space from Any Views
|
| 387 |
+
</p>
|
| 388 |
+
<div style="margin-top: 20px;">
|
| 389 |
+
<!-- Revert buttons to original inline styles -->
|
| 390 |
+
<a href="https://depth-anything-3.github.io" target="_blank" class="link-btn">
|
| 391 |
+
<i class="fas fa-globe" style="margin-right: 8px;"></i> Project Page
|
| 392 |
+
</a>
|
| 393 |
+
<a href="https://arxiv.org/abs/2406.09414" target="_blank" class="link-btn">
|
| 394 |
+
<i class="fas fa-file-pdf" style="margin-right: 8px;"></i> Paper
|
| 395 |
+
</a>
|
| 396 |
+
<a href="https://github.com/ByteDance-Seed/Depth-Anything-3" target="_blank" class="link-btn">
|
| 397 |
+
<i class="fab fa-github" style="margin-right: 8px;"></i> Code
|
| 398 |
+
</a>
|
| 399 |
+
</div>
|
| 400 |
+
</div>
|
| 401 |
+
</div>
|
| 402 |
+
|
| 403 |
+
<style>
|
| 404 |
+
/* Ensure tech-bg class is properly applied in dark mode */
|
| 405 |
+
@media (prefers-color-scheme: dark) {
|
| 406 |
+
.header-subtitle {
|
| 407 |
+
color: #cbd5e1;
|
| 408 |
+
}
|
| 409 |
+
/* Increase priority to ensure background color is properly applied */
|
| 410 |
+
.tech-bg {
|
| 411 |
+
background: linear-gradient(135deg, #0f172a, #1e293b) !important;
|
| 412 |
+
}
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
@media (prefers-color-scheme: light) {
|
| 416 |
+
.header-subtitle {
|
| 417 |
+
color: #475569;
|
| 418 |
+
}
|
| 419 |
+
/* Also add explicit background color for light mode */
|
| 420 |
+
.tech-bg {
|
| 421 |
+
background: linear-gradient(135deg, rgba(59, 130, 246, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%) !important;
|
| 422 |
+
}
|
| 423 |
+
}
|
| 424 |
+
</style>
|
| 425 |
+
"""
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
def get_description_html():
|
| 429 |
+
"""
|
| 430 |
+
Generate the main description and getting started HTML.
|
| 431 |
+
|
| 432 |
+
Returns:
|
| 433 |
+
str: HTML string for the description
|
| 434 |
+
"""
|
| 435 |
+
return """
|
| 436 |
+
<div class="description-container" style="padding: 25px; border-radius: 15px; margin: 0 0 20px 0;">
|
| 437 |
+
<h2 class="description-title" style="margin-top: 0; font-size: 1.6em; text-align: center;">
|
| 438 |
+
<i class="fas fa-bullseye fa-color-red" style="margin-right: 8px;"></i> What This Demo Does
|
| 439 |
+
</h2>
|
| 440 |
+
<div class="description-content" style="padding: 20px; border-radius: 10px; margin: 15px 0; text-align: center;">
|
| 441 |
+
<p class="description-main" style="line-height: 1.6; margin: 0; font-size: 1.45em;">
|
| 442 |
+
<strong>Upload images or videos</strong> → <strong>Get <span class="metric-text">Metric</span> <span class="pointcloud-text">Point Clouds</span>, <span class="cameras-text">Cameras</span> and <span class="gaussians-text">Novel Views</span></strong> → <strong>Explore in 3D</strong>
|
| 443 |
+
</p>
|
| 444 |
+
</div>
|
| 445 |
+
|
| 446 |
+
<div style="text-align: center; margin-top: 15px;">
|
| 447 |
+
<p class="description-tip" style="font-style: italic; margin: 0;">
|
| 448 |
+
<i class="fas fa-lightbulb fa-color-yellow" style="margin-right: 8px;"></i> <strong>Tip:</strong> Landscape-oriented images or videos are preferred for best 3D recovering.
|
| 449 |
+
</p>
|
| 450 |
+
</div>
|
| 451 |
+
</div>
|
| 452 |
+
|
| 453 |
+
<style>
|
| 454 |
+
@media (prefers-color-scheme: dark) {
|
| 455 |
+
.description-container {
|
| 456 |
+
background: linear-gradient(135deg, rgba(59, 130, 246, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%);
|
| 457 |
+
border: 1px solid rgba(59, 130, 246, 0.2);
|
| 458 |
+
}
|
| 459 |
+
.description-title { color: #3b82f6; }
|
| 460 |
+
.description-content { background: rgba(0, 0, 0, 0.3); }
|
| 461 |
+
.description-main { color: #e0e0e0; }
|
| 462 |
+
.description-text { color: #cbd5e1; }
|
| 463 |
+
.description-tip { color: #cbd5e1; }
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
@media (prefers-color-scheme: light) {
|
| 467 |
+
.description-container {
|
| 468 |
+
background: linear-gradient(135deg, rgba(59, 130, 246, 0.05) 0%, rgba(139, 92, 246, 0.05) 100%);
|
| 469 |
+
border: 1px solid rgba(59, 130, 246, 0.3);
|
| 470 |
+
}
|
| 471 |
+
.description-title { color: #3b82f6; }
|
| 472 |
+
.description-content { background: transparent; }
|
| 473 |
+
.description-main { color: #1e293b; }
|
| 474 |
+
.description-text { color: #475569; }
|
| 475 |
+
.description-tip { color: #475569; }
|
| 476 |
+
}
|
| 477 |
+
</style>
|
| 478 |
+
"""
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
def get_acknowledgements_html():
|
| 482 |
+
"""
|
| 483 |
+
Generate the acknowledgements section HTML.
|
| 484 |
+
|
| 485 |
+
Returns:
|
| 486 |
+
str: HTML string for the acknowledgements
|
| 487 |
+
"""
|
| 488 |
+
return """
|
| 489 |
+
<div style="background: linear-gradient(135deg, rgba(59, 130, 246, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%);
|
| 490 |
+
padding: 25px; border-radius: 15px; margin: 20px 0; border: 1px solid rgba(59, 130, 246, 0.2);">
|
| 491 |
+
<h3 style="color: #3b82f6; margin-top: 0; text-align: center; font-size: 1.4em;">
|
| 492 |
+
<i class="fas fa-trophy fa-color-yellow" style="margin-right: 8px;"></i> Research Credits & Acknowledgments
|
| 493 |
+
</h3>
|
| 494 |
+
|
| 495 |
+
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin: 15px 0;">
|
| 496 |
+
<!-- Original Research Section (Left) -->
|
| 497 |
+
<div style="text-align: center;">
|
| 498 |
+
<h4 style="color: #8b5cf6; margin: 10px 0;"><i class="fas fa-flask fa-color-green" style="margin-right: 8px;"></i> Original Research</h4>
|
| 499 |
+
<p style="color: #e0e0e0; margin: 5px 0;">
|
| 500 |
+
<a href="https://depth-anything-3.github.io" target="_blank"
|
| 501 |
+
style="color: #3b82f6; text-decoration: none; font-weight: 600;">
|
| 502 |
+
Depth Anything 3
|
| 503 |
+
</a>
|
| 504 |
+
</p>
|
| 505 |
+
</div>
|
| 506 |
+
|
| 507 |
+
<!-- Previous Versions Section (Right) -->
|
| 508 |
+
<div style="text-align: center;">
|
| 509 |
+
<h4 style="color: #8b5cf6; margin: 10px 0;"><i class="fas fa-history fa-color-blue" style="margin-right: 8px;"></i> Previous Versions</h4>
|
| 510 |
+
<div style="display: flex; flex-direction: row; gap: 15px; justify-content: center; align-items: center;">
|
| 511 |
+
<p style="color: #e0e0e0; margin: 0;">
|
| 512 |
+
<a href="https://huggingface.co/spaces/LiheYoung/Depth-Anything" target="_blank"
|
| 513 |
+
style="color: #3b82f6; text-decoration: none; font-weight: 600;">
|
| 514 |
+
Depth-Anything
|
| 515 |
+
</a>
|
| 516 |
+
</p>
|
| 517 |
+
<span style="color: #e0e0e0;">•</span>
|
| 518 |
+
<p style="color: #e0e0e0; margin: 0;">
|
| 519 |
+
<a href="https://huggingface.co/spaces/depth-anything/Depth-Anything-V2" target="_blank"
|
| 520 |
+
style="color: #3b82f6; text-decoration: none; font-weight: 600;">
|
| 521 |
+
Depth-Anything-V2
|
| 522 |
+
</a>
|
| 523 |
+
</p>
|
| 524 |
+
</div>
|
| 525 |
+
</div>
|
| 526 |
+
</div>
|
| 527 |
+
|
| 528 |
+
<!-- HF Demo Adapted from - Centered at the bottom of the whole block -->
|
| 529 |
+
<div style="margin-top: 20px; padding-top: 15px; border-top: 1px solid rgba(59, 130, 246, 0.3); text-align: center;">
|
| 530 |
+
<p style="color: #a0a0a0; font-size: 0.9em; margin: 0;">
|
| 531 |
+
<i class="fas fa-code-branch fa-color-gray" style="margin-right: 5px;"></i> HF demo adapted from <a href="https://huggingface.co/spaces/facebook/map-anything" target="_blank" style="color: inherit; text-decoration: none;">Map Anything</a>
|
| 532 |
+
</p>
|
| 533 |
+
</div>
|
| 534 |
+
</div>
|
| 535 |
+
"""
|
| 536 |
+
|
| 537 |
+
|
| 538 |
+
def get_gradio_theme():
|
| 539 |
+
"""
|
| 540 |
+
Get the configured Gradio theme with adaptive tech colors.
|
| 541 |
+
|
| 542 |
+
Returns:
|
| 543 |
+
gr.themes.Base: Configured Gradio theme
|
| 544 |
+
"""
|
| 545 |
+
import gradio as gr
|
| 546 |
+
|
| 547 |
+
return gr.themes.Base(
|
| 548 |
+
primary_hue=gr.themes.Color(
|
| 549 |
+
c50="#eff6ff",
|
| 550 |
+
c100="#dbeafe",
|
| 551 |
+
c200="#bfdbfe",
|
| 552 |
+
c300="#93c5fd",
|
| 553 |
+
c400="#60a5fa",
|
| 554 |
+
c500="#3b82f6",
|
| 555 |
+
c600="#2563eb",
|
| 556 |
+
c700="#1d4ed8",
|
| 557 |
+
c800="#1e40af",
|
| 558 |
+
c900="#1e3a8a",
|
| 559 |
+
c950="#172554",
|
| 560 |
+
),
|
| 561 |
+
secondary_hue=gr.themes.Color(
|
| 562 |
+
c50="#f5f3ff",
|
| 563 |
+
c100="#ede9fe",
|
| 564 |
+
c200="#ddd6fe",
|
| 565 |
+
c300="#c4b5fd",
|
| 566 |
+
c400="#a78bfa",
|
| 567 |
+
c500="#8b5cf6",
|
| 568 |
+
c600="#7c3aed",
|
| 569 |
+
c700="#6d28d9",
|
| 570 |
+
c800="#5b21b6",
|
| 571 |
+
c900="#4c1d95",
|
| 572 |
+
c950="#2e1065",
|
| 573 |
+
),
|
| 574 |
+
neutral_hue=gr.themes.Color(
|
| 575 |
+
c50="#f8fafc",
|
| 576 |
+
c100="#f1f5f9",
|
| 577 |
+
c200="#e2e8f0",
|
| 578 |
+
c300="#cbd5e1",
|
| 579 |
+
c400="#94a3b8",
|
| 580 |
+
c500="#64748b",
|
| 581 |
+
c600="#475569",
|
| 582 |
+
c700="#334155",
|
| 583 |
+
c800="#1e293b",
|
| 584 |
+
c900="#0f172a",
|
| 585 |
+
c950="#020617",
|
| 586 |
+
),
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
# Measure tab instructions HTML
|
| 591 |
+
MEASURE_INSTRUCTIONS_HTML = """
|
| 592 |
+
### Click points on the image to compute distance.
|
| 593 |
+
> <i class="fas fa-triangle-exclamation fa-color-red" style="margin-right: 5px;"></i> Metric scale estimation is difficult on aerial/drone images.
|
| 594 |
+
"""
|
src/depth_anything_3/app/gradio_app.py
ADDED
|
@@ -0,0 +1,747 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
Refactored Gradio App for Depth Anything 3.
|
| 17 |
+
|
| 18 |
+
This is the main application file that orchestrates all components.
|
| 19 |
+
The original functionality has been split into modular components for better maintainability.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import argparse
|
| 23 |
+
import os
|
| 24 |
+
from typing import Any, Dict, List
|
| 25 |
+
import gradio as gr
|
| 26 |
+
|
| 27 |
+
from depth_anything_3.app.css_and_html import GRADIO_CSS, get_gradio_theme
|
| 28 |
+
from depth_anything_3.app.modules.event_handlers import EventHandlers
|
| 29 |
+
from depth_anything_3.app.modules.ui_components import UIComponents
|
| 30 |
+
|
| 31 |
+
# Set environment variables
|
| 32 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class DepthAnything3App:
|
| 36 |
+
"""
|
| 37 |
+
Main application class for Depth Anything 3 Gradio app.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self, model_dir: str = None, workspace_dir: str = None, gallery_dir: str = None):
|
| 41 |
+
"""
|
| 42 |
+
Initialize the application.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
model_dir: Path to the model directory
|
| 46 |
+
workspace_dir: Path to the workspace directory
|
| 47 |
+
gallery_dir: Path to the gallery directory
|
| 48 |
+
"""
|
| 49 |
+
self.model_dir = model_dir
|
| 50 |
+
self.workspace_dir = workspace_dir
|
| 51 |
+
self.gallery_dir = gallery_dir
|
| 52 |
+
|
| 53 |
+
# Set environment variables for directories
|
| 54 |
+
if self.model_dir:
|
| 55 |
+
os.environ["DA3_MODEL_DIR"] = self.model_dir
|
| 56 |
+
if self.workspace_dir:
|
| 57 |
+
os.environ["DA3_WORKSPACE_DIR"] = self.workspace_dir
|
| 58 |
+
if self.gallery_dir:
|
| 59 |
+
os.environ["DA3_GALLERY_DIR"] = self.gallery_dir
|
| 60 |
+
|
| 61 |
+
self.event_handlers = EventHandlers()
|
| 62 |
+
self.ui_components = UIComponents()
|
| 63 |
+
|
| 64 |
+
def cache_examples(
|
| 65 |
+
self,
|
| 66 |
+
show_cam: bool = True,
|
| 67 |
+
filter_black_bg: bool = False,
|
| 68 |
+
filter_white_bg: bool = False,
|
| 69 |
+
save_percentage: float = 20.0,
|
| 70 |
+
num_max_points: int = 1000,
|
| 71 |
+
cache_gs_tag: str = "",
|
| 72 |
+
gs_trj_mode: str = "smooth",
|
| 73 |
+
gs_video_quality: str = "low",
|
| 74 |
+
) -> None:
|
| 75 |
+
"""
|
| 76 |
+
Pre-cache all example scenes at startup.
|
| 77 |
+
|
| 78 |
+
Args:
|
| 79 |
+
show_cam: Whether to show camera in visualization
|
| 80 |
+
filter_black_bg: Whether to filter black background
|
| 81 |
+
filter_white_bg: Whether to filter white background
|
| 82 |
+
save_percentage: Filter percentage for point cloud
|
| 83 |
+
num_max_points: Maximum number of points
|
| 84 |
+
cache_gs_tag: Tag to match scene names for high-res+3DGS caching (e.g., "dl3dv")
|
| 85 |
+
gs_trj_mode: Trajectory mode for 3DGS
|
| 86 |
+
gs_video_quality: Video quality for 3DGS
|
| 87 |
+
"""
|
| 88 |
+
from depth_anything_3.app.modules.utils import get_scene_info
|
| 89 |
+
|
| 90 |
+
examples_dir = os.path.join(self.workspace_dir, "examples")
|
| 91 |
+
if not os.path.exists(examples_dir):
|
| 92 |
+
print(f"Examples directory not found: {examples_dir}")
|
| 93 |
+
return
|
| 94 |
+
|
| 95 |
+
scenes = get_scene_info(examples_dir)
|
| 96 |
+
if not scenes:
|
| 97 |
+
print("No example scenes found to cache.")
|
| 98 |
+
return
|
| 99 |
+
|
| 100 |
+
print(f"\n{'='*60}")
|
| 101 |
+
print(f"Caching {len(scenes)} example scenes...")
|
| 102 |
+
print(f"{'='*60}\n")
|
| 103 |
+
|
| 104 |
+
for i, scene in enumerate(scenes, 1):
|
| 105 |
+
scene_name = scene["name"]
|
| 106 |
+
|
| 107 |
+
# Check if scene name matches the gs tag for high-res+3DGS caching
|
| 108 |
+
use_high_res_gs = cache_gs_tag and cache_gs_tag.lower() in scene_name.lower()
|
| 109 |
+
|
| 110 |
+
if use_high_res_gs:
|
| 111 |
+
print(f"[{i}/{len(scenes)}] Caching scene: {scene_name} (HIGH-RES + 3DGS)")
|
| 112 |
+
print(f" - Number of images: {scene['num_images']}")
|
| 113 |
+
print(f" - Matched tag: '{cache_gs_tag}' - using high_res + 3DGS")
|
| 114 |
+
else:
|
| 115 |
+
print(f"[{i}/{len(scenes)}] Caching scene: {scene_name} (LOW-RES)")
|
| 116 |
+
print(f" - Number of images: {scene['num_images']}")
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
# Load example scene
|
| 120 |
+
_, target_dir, _, _, _, _, _, _, _ = self.event_handlers.load_example_scene(
|
| 121 |
+
scene_name
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
if target_dir and target_dir != "None":
|
| 125 |
+
# Run reconstruction with appropriate settings
|
| 126 |
+
print(" - Running reconstruction...")
|
| 127 |
+
result = self.event_handlers.gradio_demo(
|
| 128 |
+
target_dir=target_dir,
|
| 129 |
+
show_cam=show_cam,
|
| 130 |
+
filter_black_bg=filter_black_bg,
|
| 131 |
+
filter_white_bg=filter_white_bg,
|
| 132 |
+
process_res_method="high_res" if use_high_res_gs else "low_res",
|
| 133 |
+
selected_first_frame="",
|
| 134 |
+
save_percentage=save_percentage,
|
| 135 |
+
num_max_points=num_max_points,
|
| 136 |
+
infer_gs=use_high_res_gs,
|
| 137 |
+
gs_trj_mode=gs_trj_mode,
|
| 138 |
+
gs_video_quality=gs_video_quality,
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Check if successful
|
| 142 |
+
if result[0] is not None: # reconstruction_output
|
| 143 |
+
print(f" ✓ Scene '{scene_name}' cached successfully")
|
| 144 |
+
else:
|
| 145 |
+
print(f" ✗ Scene '{scene_name}' caching failed: {result[1]}")
|
| 146 |
+
else:
|
| 147 |
+
print(f" ✗ Scene '{scene_name}' loading failed")
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
print(f" ✗ Error caching scene '{scene_name}': {str(e)}")
|
| 151 |
+
|
| 152 |
+
print()
|
| 153 |
+
|
| 154 |
+
print("=" * 60)
|
| 155 |
+
print("Example scene caching completed!")
|
| 156 |
+
print("=" * 60 + "\n")
|
| 157 |
+
|
| 158 |
+
def create_app(self) -> gr.Blocks:
|
| 159 |
+
"""
|
| 160 |
+
Create and configure the Gradio application.
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
Configured Gradio Blocks interface
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
# Initialize theme
|
| 167 |
+
def get_theme():
|
| 168 |
+
return get_gradio_theme()
|
| 169 |
+
|
| 170 |
+
with gr.Blocks(theme=get_theme(), css=GRADIO_CSS) as demo:
|
| 171 |
+
# State variables for the tabbed interface
|
| 172 |
+
is_example = gr.Textbox(label="is_example", visible=False, value="None")
|
| 173 |
+
processed_data_state = gr.State(value=None)
|
| 174 |
+
measure_points_state = gr.State(value=[])
|
| 175 |
+
selected_first_frame_state = gr.State(value="")
|
| 176 |
+
selected_image_index_state = gr.State(value=0) # Track selected image index
|
| 177 |
+
# current_view_index = gr.State(value=0) # noqa: F841 Track current view index
|
| 178 |
+
|
| 179 |
+
# Header and description
|
| 180 |
+
self.ui_components.create_header_section()
|
| 181 |
+
self.ui_components.create_description_section()
|
| 182 |
+
|
| 183 |
+
target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
|
| 184 |
+
|
| 185 |
+
# Main content area
|
| 186 |
+
with gr.Row():
|
| 187 |
+
with gr.Column(scale=2):
|
| 188 |
+
# Upload section
|
| 189 |
+
(
|
| 190 |
+
input_video,
|
| 191 |
+
s_time_interval,
|
| 192 |
+
input_images,
|
| 193 |
+
image_gallery,
|
| 194 |
+
select_first_frame_btn,
|
| 195 |
+
) = self.ui_components.create_upload_section()
|
| 196 |
+
|
| 197 |
+
with gr.Column(scale=4):
|
| 198 |
+
with gr.Column():
|
| 199 |
+
# gr.Markdown("**Metric 3D Reconstruction (Point Cloud and Camera Poses)**")
|
| 200 |
+
# Reconstruction control section (buttons) - moved below tabs
|
| 201 |
+
|
| 202 |
+
log_output = gr.Markdown(
|
| 203 |
+
"Please upload a video or images, then click Reconstruct.",
|
| 204 |
+
elem_classes=["custom-log"],
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
# Tabbed interface
|
| 208 |
+
with gr.Tabs():
|
| 209 |
+
with gr.Tab("Point Cloud & Cameras"):
|
| 210 |
+
reconstruction_output = (
|
| 211 |
+
self.ui_components.create_3d_viewer_section()
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
with gr.Tab("Metric Depth"):
|
| 215 |
+
(
|
| 216 |
+
prev_measure_btn,
|
| 217 |
+
measure_view_selector,
|
| 218 |
+
next_measure_btn,
|
| 219 |
+
measure_image,
|
| 220 |
+
measure_depth_image,
|
| 221 |
+
measure_text,
|
| 222 |
+
) = self.ui_components.create_measure_section()
|
| 223 |
+
|
| 224 |
+
with gr.Tab("3DGS Rendered Novel Views"):
|
| 225 |
+
gs_video, gs_info = self.ui_components.create_nvs_video()
|
| 226 |
+
|
| 227 |
+
# Inference control section (before inference)
|
| 228 |
+
(process_res_method_dropdown, infer_gs) = (
|
| 229 |
+
self.ui_components.create_inference_control_section()
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
# Display control section - includes 3DGS options, buttons, and Visualization Options # noqa: E501
|
| 233 |
+
(
|
| 234 |
+
show_cam,
|
| 235 |
+
filter_black_bg,
|
| 236 |
+
filter_white_bg,
|
| 237 |
+
save_percentage,
|
| 238 |
+
num_max_points,
|
| 239 |
+
gs_trj_mode,
|
| 240 |
+
gs_video_quality,
|
| 241 |
+
submit_btn,
|
| 242 |
+
clear_btn,
|
| 243 |
+
) = self.ui_components.create_display_control_section()
|
| 244 |
+
|
| 245 |
+
# bind visibility of gs_trj_mode to infer_gs
|
| 246 |
+
infer_gs.change(
|
| 247 |
+
fn=lambda checked: (
|
| 248 |
+
gr.update(visible=checked),
|
| 249 |
+
gr.update(visible=checked),
|
| 250 |
+
gr.update(visible=checked),
|
| 251 |
+
gr.update(visible=(not checked)),
|
| 252 |
+
),
|
| 253 |
+
inputs=infer_gs,
|
| 254 |
+
outputs=[gs_trj_mode, gs_video_quality, gs_video, gs_info],
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
# Example scenes section
|
| 258 |
+
gr.Markdown("## Example Scenes")
|
| 259 |
+
|
| 260 |
+
scenes = self.ui_components.create_example_scenes_section()
|
| 261 |
+
scene_components = self.ui_components.create_example_scene_grid(scenes)
|
| 262 |
+
|
| 263 |
+
# Set up event handlers
|
| 264 |
+
self._setup_event_handlers(
|
| 265 |
+
demo,
|
| 266 |
+
is_example,
|
| 267 |
+
processed_data_state,
|
| 268 |
+
measure_points_state,
|
| 269 |
+
target_dir_output,
|
| 270 |
+
input_video,
|
| 271 |
+
input_images,
|
| 272 |
+
s_time_interval,
|
| 273 |
+
image_gallery,
|
| 274 |
+
reconstruction_output,
|
| 275 |
+
log_output,
|
| 276 |
+
show_cam,
|
| 277 |
+
filter_black_bg,
|
| 278 |
+
filter_white_bg,
|
| 279 |
+
process_res_method_dropdown,
|
| 280 |
+
save_percentage,
|
| 281 |
+
submit_btn,
|
| 282 |
+
clear_btn,
|
| 283 |
+
num_max_points,
|
| 284 |
+
infer_gs,
|
| 285 |
+
select_first_frame_btn,
|
| 286 |
+
selected_first_frame_state,
|
| 287 |
+
selected_image_index_state,
|
| 288 |
+
measure_view_selector,
|
| 289 |
+
measure_image,
|
| 290 |
+
measure_depth_image,
|
| 291 |
+
measure_text,
|
| 292 |
+
prev_measure_btn,
|
| 293 |
+
next_measure_btn,
|
| 294 |
+
scenes,
|
| 295 |
+
scene_components,
|
| 296 |
+
gs_video,
|
| 297 |
+
gs_info,
|
| 298 |
+
gs_trj_mode,
|
| 299 |
+
gs_video_quality,
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
# Acknowledgements
|
| 303 |
+
self.ui_components.create_acknowledgements_section()
|
| 304 |
+
|
| 305 |
+
return demo
|
| 306 |
+
|
| 307 |
+
def _setup_event_handlers(
|
| 308 |
+
self,
|
| 309 |
+
demo: gr.Blocks,
|
| 310 |
+
is_example: gr.Textbox,
|
| 311 |
+
processed_data_state: gr.State,
|
| 312 |
+
measure_points_state: gr.State,
|
| 313 |
+
target_dir_output: gr.Textbox,
|
| 314 |
+
input_video: gr.Video,
|
| 315 |
+
input_images: gr.File,
|
| 316 |
+
s_time_interval: gr.Slider,
|
| 317 |
+
image_gallery: gr.Gallery,
|
| 318 |
+
reconstruction_output: gr.Model3D,
|
| 319 |
+
log_output: gr.Markdown,
|
| 320 |
+
show_cam: gr.Checkbox,
|
| 321 |
+
filter_black_bg: gr.Checkbox,
|
| 322 |
+
filter_white_bg: gr.Checkbox,
|
| 323 |
+
process_res_method_dropdown: gr.Dropdown,
|
| 324 |
+
save_percentage: gr.Slider,
|
| 325 |
+
submit_btn: gr.Button,
|
| 326 |
+
clear_btn: gr.ClearButton,
|
| 327 |
+
num_max_points: gr.Slider,
|
| 328 |
+
infer_gs: gr.Checkbox,
|
| 329 |
+
select_first_frame_btn: gr.Button,
|
| 330 |
+
selected_first_frame_state: gr.State,
|
| 331 |
+
selected_image_index_state: gr.State,
|
| 332 |
+
measure_view_selector: gr.Dropdown,
|
| 333 |
+
measure_image: gr.Image,
|
| 334 |
+
measure_depth_image: gr.Image,
|
| 335 |
+
measure_text: gr.Markdown,
|
| 336 |
+
prev_measure_btn: gr.Button,
|
| 337 |
+
next_measure_btn: gr.Button,
|
| 338 |
+
scenes: List[Dict[str, Any]],
|
| 339 |
+
scene_components: List[gr.Image],
|
| 340 |
+
gs_video: gr.Video,
|
| 341 |
+
gs_info: gr.Markdown,
|
| 342 |
+
gs_trj_mode: gr.Dropdown,
|
| 343 |
+
gs_video_quality: gr.Dropdown,
|
| 344 |
+
) -> None:
|
| 345 |
+
"""
|
| 346 |
+
Set up all event handlers for the application.
|
| 347 |
+
|
| 348 |
+
Args:
|
| 349 |
+
demo: Gradio Blocks interface
|
| 350 |
+
All other arguments: Gradio components to connect
|
| 351 |
+
"""
|
| 352 |
+
# Configure clear button
|
| 353 |
+
clear_btn.add(
|
| 354 |
+
[
|
| 355 |
+
input_video,
|
| 356 |
+
input_images,
|
| 357 |
+
reconstruction_output,
|
| 358 |
+
log_output,
|
| 359 |
+
target_dir_output,
|
| 360 |
+
image_gallery,
|
| 361 |
+
gs_video,
|
| 362 |
+
]
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
# Main reconstruction button
|
| 366 |
+
submit_btn.click(
|
| 367 |
+
fn=self.event_handlers.clear_fields, inputs=[], outputs=[reconstruction_output]
|
| 368 |
+
).then(fn=self.event_handlers.update_log, inputs=[], outputs=[log_output]).then(
|
| 369 |
+
fn=self.event_handlers.gradio_demo,
|
| 370 |
+
inputs=[
|
| 371 |
+
target_dir_output,
|
| 372 |
+
show_cam,
|
| 373 |
+
filter_black_bg,
|
| 374 |
+
filter_white_bg,
|
| 375 |
+
process_res_method_dropdown,
|
| 376 |
+
selected_first_frame_state,
|
| 377 |
+
save_percentage,
|
| 378 |
+
# pass num_max_points
|
| 379 |
+
num_max_points,
|
| 380 |
+
infer_gs,
|
| 381 |
+
gs_trj_mode,
|
| 382 |
+
gs_video_quality,
|
| 383 |
+
],
|
| 384 |
+
outputs=[
|
| 385 |
+
reconstruction_output,
|
| 386 |
+
log_output,
|
| 387 |
+
processed_data_state,
|
| 388 |
+
measure_image,
|
| 389 |
+
measure_depth_image,
|
| 390 |
+
measure_text,
|
| 391 |
+
measure_view_selector,
|
| 392 |
+
gs_video,
|
| 393 |
+
gs_video, # gs_video visibility
|
| 394 |
+
gs_info, # gs_info visibility
|
| 395 |
+
],
|
| 396 |
+
).then(
|
| 397 |
+
fn=lambda: "False",
|
| 398 |
+
inputs=[],
|
| 399 |
+
outputs=[is_example], # set is_example to "False"
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
# Real-time visualization updates
|
| 403 |
+
self._setup_visualization_handlers(
|
| 404 |
+
show_cam,
|
| 405 |
+
filter_black_bg,
|
| 406 |
+
filter_white_bg,
|
| 407 |
+
process_res_method_dropdown,
|
| 408 |
+
target_dir_output,
|
| 409 |
+
is_example,
|
| 410 |
+
reconstruction_output,
|
| 411 |
+
log_output,
|
| 412 |
+
)
|
| 413 |
+
|
| 414 |
+
# File upload handlers
|
| 415 |
+
input_video.change(
|
| 416 |
+
fn=self.event_handlers.handle_uploads,
|
| 417 |
+
inputs=[input_video, input_images, s_time_interval],
|
| 418 |
+
outputs=[reconstruction_output, target_dir_output, image_gallery, log_output],
|
| 419 |
+
)
|
| 420 |
+
input_images.change(
|
| 421 |
+
fn=self.event_handlers.handle_uploads,
|
| 422 |
+
inputs=[input_video, input_images, s_time_interval],
|
| 423 |
+
outputs=[reconstruction_output, target_dir_output, image_gallery, log_output],
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
# Image gallery click handler (for selecting first frame)
|
| 427 |
+
def handle_image_selection(evt: gr.SelectData):
|
| 428 |
+
if evt is None or evt.index is None:
|
| 429 |
+
return "No image selected", 0
|
| 430 |
+
selected_index = evt.index
|
| 431 |
+
return f"Selected image {selected_index} as potential first frame", selected_index
|
| 432 |
+
|
| 433 |
+
image_gallery.select(
|
| 434 |
+
fn=handle_image_selection,
|
| 435 |
+
outputs=[log_output, selected_image_index_state],
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
# Select first frame handler
|
| 439 |
+
select_first_frame_btn.click(
|
| 440 |
+
fn=self.event_handlers.select_first_frame,
|
| 441 |
+
inputs=[image_gallery, selected_image_index_state],
|
| 442 |
+
outputs=[image_gallery, log_output, selected_first_frame_state],
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
# Navigation handlers
|
| 446 |
+
self._setup_navigation_handlers(
|
| 447 |
+
prev_measure_btn,
|
| 448 |
+
next_measure_btn,
|
| 449 |
+
measure_view_selector,
|
| 450 |
+
measure_image,
|
| 451 |
+
measure_depth_image,
|
| 452 |
+
measure_points_state,
|
| 453 |
+
processed_data_state,
|
| 454 |
+
)
|
| 455 |
+
|
| 456 |
+
# Measurement handler
|
| 457 |
+
measure_image.select(
|
| 458 |
+
fn=self.event_handlers.measure,
|
| 459 |
+
inputs=[processed_data_state, measure_points_state, measure_view_selector],
|
| 460 |
+
outputs=[measure_image, measure_depth_image, measure_points_state, measure_text],
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
+
# Example scene handlers
|
| 464 |
+
self._setup_example_scene_handlers(
|
| 465 |
+
scenes,
|
| 466 |
+
scene_components,
|
| 467 |
+
reconstruction_output,
|
| 468 |
+
target_dir_output,
|
| 469 |
+
image_gallery,
|
| 470 |
+
log_output,
|
| 471 |
+
is_example,
|
| 472 |
+
processed_data_state,
|
| 473 |
+
measure_view_selector,
|
| 474 |
+
measure_image,
|
| 475 |
+
measure_depth_image,
|
| 476 |
+
gs_video,
|
| 477 |
+
gs_info,
|
| 478 |
+
)
|
| 479 |
+
|
| 480 |
+
def _setup_visualization_handlers(
|
| 481 |
+
self,
|
| 482 |
+
show_cam: gr.Checkbox,
|
| 483 |
+
filter_black_bg: gr.Checkbox,
|
| 484 |
+
filter_white_bg: gr.Checkbox,
|
| 485 |
+
process_res_method_dropdown: gr.Dropdown,
|
| 486 |
+
target_dir_output: gr.Textbox,
|
| 487 |
+
is_example: gr.Textbox,
|
| 488 |
+
reconstruction_output: gr.Model3D,
|
| 489 |
+
log_output: gr.Markdown,
|
| 490 |
+
) -> None:
|
| 491 |
+
"""Set up visualization update handlers."""
|
| 492 |
+
# Common inputs for visualization updates
|
| 493 |
+
viz_inputs = [
|
| 494 |
+
target_dir_output,
|
| 495 |
+
show_cam,
|
| 496 |
+
is_example,
|
| 497 |
+
filter_black_bg,
|
| 498 |
+
filter_white_bg,
|
| 499 |
+
process_res_method_dropdown,
|
| 500 |
+
]
|
| 501 |
+
|
| 502 |
+
# Set up change handlers for all visualization controls
|
| 503 |
+
for component in [show_cam, filter_black_bg, filter_white_bg]:
|
| 504 |
+
component.change(
|
| 505 |
+
fn=self.event_handlers.update_visualization,
|
| 506 |
+
inputs=viz_inputs,
|
| 507 |
+
outputs=[reconstruction_output, log_output],
|
| 508 |
+
)
|
| 509 |
+
|
| 510 |
+
def _setup_navigation_handlers(
|
| 511 |
+
self,
|
| 512 |
+
prev_measure_btn: gr.Button,
|
| 513 |
+
next_measure_btn: gr.Button,
|
| 514 |
+
measure_view_selector: gr.Dropdown,
|
| 515 |
+
measure_image: gr.Image,
|
| 516 |
+
measure_depth_image: gr.Image,
|
| 517 |
+
measure_points_state: gr.State,
|
| 518 |
+
processed_data_state: gr.State,
|
| 519 |
+
) -> None:
|
| 520 |
+
"""Set up navigation handlers for measure tab."""
|
| 521 |
+
# Measure tab navigation
|
| 522 |
+
prev_measure_btn.click(
|
| 523 |
+
fn=lambda processed_data, current_selector: self.event_handlers.navigate_measure_view(
|
| 524 |
+
processed_data, current_selector, -1
|
| 525 |
+
),
|
| 526 |
+
inputs=[processed_data_state, measure_view_selector],
|
| 527 |
+
outputs=[
|
| 528 |
+
measure_view_selector,
|
| 529 |
+
measure_image,
|
| 530 |
+
measure_depth_image,
|
| 531 |
+
measure_points_state,
|
| 532 |
+
],
|
| 533 |
+
)
|
| 534 |
+
|
| 535 |
+
next_measure_btn.click(
|
| 536 |
+
fn=lambda processed_data, current_selector: self.event_handlers.navigate_measure_view(
|
| 537 |
+
processed_data, current_selector, 1
|
| 538 |
+
),
|
| 539 |
+
inputs=[processed_data_state, measure_view_selector],
|
| 540 |
+
outputs=[
|
| 541 |
+
measure_view_selector,
|
| 542 |
+
measure_image,
|
| 543 |
+
measure_depth_image,
|
| 544 |
+
measure_points_state,
|
| 545 |
+
],
|
| 546 |
+
)
|
| 547 |
+
|
| 548 |
+
measure_view_selector.change(
|
| 549 |
+
fn=lambda processed_data, selector_value: (
|
| 550 |
+
self.event_handlers.update_measure_view(
|
| 551 |
+
processed_data, int(selector_value.split()[1]) - 1
|
| 552 |
+
)
|
| 553 |
+
if selector_value
|
| 554 |
+
else (None, None, [])
|
| 555 |
+
),
|
| 556 |
+
inputs=[processed_data_state, measure_view_selector],
|
| 557 |
+
outputs=[measure_image, measure_depth_image, measure_points_state],
|
| 558 |
+
)
|
| 559 |
+
|
| 560 |
+
def _setup_example_scene_handlers(
|
| 561 |
+
self,
|
| 562 |
+
scenes: List[Dict[str, Any]],
|
| 563 |
+
scene_components: List[gr.Image],
|
| 564 |
+
reconstruction_output: gr.Model3D,
|
| 565 |
+
target_dir_output: gr.Textbox,
|
| 566 |
+
image_gallery: gr.Gallery,
|
| 567 |
+
log_output: gr.Markdown,
|
| 568 |
+
is_example: gr.Textbox,
|
| 569 |
+
processed_data_state: gr.State,
|
| 570 |
+
measure_view_selector: gr.Dropdown,
|
| 571 |
+
measure_image: gr.Image,
|
| 572 |
+
measure_depth_image: gr.Image,
|
| 573 |
+
gs_video: gr.Video,
|
| 574 |
+
gs_info: gr.Markdown,
|
| 575 |
+
) -> None:
|
| 576 |
+
"""Set up example scene handlers."""
|
| 577 |
+
|
| 578 |
+
def load_and_update_measure(name):
|
| 579 |
+
result = self.event_handlers.load_example_scene(name)
|
| 580 |
+
# result = (reconstruction_output, target_dir, image_paths, log_message, processed_data, measure_view_selector, gs_video, gs_video_vis, gs_info_vis) # noqa: E501
|
| 581 |
+
|
| 582 |
+
# Update measure view if processed_data is available
|
| 583 |
+
measure_img = None
|
| 584 |
+
measure_depth = None
|
| 585 |
+
if result[4] is not None: # processed_data exists
|
| 586 |
+
measure_img, measure_depth, _ = (
|
| 587 |
+
self.event_handlers.visualization_handler.update_measure_view(result[4], 0)
|
| 588 |
+
)
|
| 589 |
+
|
| 590 |
+
return result + ("True", measure_img, measure_depth)
|
| 591 |
+
|
| 592 |
+
for i, scene in enumerate(scenes):
|
| 593 |
+
if i < len(scene_components):
|
| 594 |
+
scene_components[i].select(
|
| 595 |
+
fn=lambda name=scene["name"]: load_and_update_measure(name),
|
| 596 |
+
outputs=[
|
| 597 |
+
reconstruction_output,
|
| 598 |
+
target_dir_output,
|
| 599 |
+
image_gallery,
|
| 600 |
+
log_output,
|
| 601 |
+
processed_data_state,
|
| 602 |
+
measure_view_selector,
|
| 603 |
+
gs_video,
|
| 604 |
+
gs_video, # gs_video_visibility
|
| 605 |
+
gs_info, # gs_info_visibility
|
| 606 |
+
is_example,
|
| 607 |
+
measure_image,
|
| 608 |
+
measure_depth_image,
|
| 609 |
+
],
|
| 610 |
+
)
|
| 611 |
+
|
| 612 |
+
def launch(self, host: str = "127.0.0.1", port: int = 7860, **kwargs) -> None:
|
| 613 |
+
"""
|
| 614 |
+
Launch the application.
|
| 615 |
+
|
| 616 |
+
Args:
|
| 617 |
+
host: Host address to bind to
|
| 618 |
+
port: Port number to bind to
|
| 619 |
+
**kwargs: Additional arguments for demo.launch()
|
| 620 |
+
"""
|
| 621 |
+
demo = self.create_app()
|
| 622 |
+
demo.queue(max_size=20).launch(
|
| 623 |
+
show_error=True, ssr_mode=False, server_name=host, server_port=port, **kwargs
|
| 624 |
+
)
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
def main():
|
| 628 |
+
"""Main function to run the application."""
|
| 629 |
+
parser = argparse.ArgumentParser(
|
| 630 |
+
description="Depth Anything 3 Gradio Application",
|
| 631 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 632 |
+
epilog="""
|
| 633 |
+
Examples:
|
| 634 |
+
# Basic usage
|
| 635 |
+
python gradio_app.py --help
|
| 636 |
+
python gradio_app.py --host 0.0.0.0 --port 8080
|
| 637 |
+
python gradio_app.py --model-dir /path/to/model --workspace-dir /path/to/workspace
|
| 638 |
+
|
| 639 |
+
# Cache examples at startup (all low-res)
|
| 640 |
+
python gradio_app.py --cache-examples
|
| 641 |
+
|
| 642 |
+
# Cache with selective high-res+3DGS for scenes matching tag
|
| 643 |
+
python gradio_app.py --cache-examples --cache-gs-tag dl3dv
|
| 644 |
+
# This will use high-res + 3DGS for scenes containing "dl3dv" in their name,
|
| 645 |
+
# and low-res only for other scenes
|
| 646 |
+
""",
|
| 647 |
+
)
|
| 648 |
+
|
| 649 |
+
# Server configuration
|
| 650 |
+
parser.add_argument(
|
| 651 |
+
"--host", default="127.0.0.1", help="Host address to bind to (default: 127.0.0.1)"
|
| 652 |
+
)
|
| 653 |
+
parser.add_argument(
|
| 654 |
+
"--port", type=int, default=7860, help="Port number to bind to (default: 7860)"
|
| 655 |
+
)
|
| 656 |
+
|
| 657 |
+
# Directory configuration
|
| 658 |
+
parser.add_argument(
|
| 659 |
+
"--model-dir",
|
| 660 |
+
default="depth-anything/DA3NESTED-GIANT-LARGE",
|
| 661 |
+
help="Path to the model directory (default: depth-anything/DA3NESTED-GIANT-LARGE)",
|
| 662 |
+
)
|
| 663 |
+
parser.add_argument(
|
| 664 |
+
"--workspace-dir",
|
| 665 |
+
default="workspace/gradio", # noqa: E501
|
| 666 |
+
help="Path to the workspace directory (default: workspace/gradio)", # noqa: E501
|
| 667 |
+
)
|
| 668 |
+
parser.add_argument(
|
| 669 |
+
"--gallery-dir",
|
| 670 |
+
default="workspace/gallery",
|
| 671 |
+
help="Path to the gallery directory (default: workspace/gallery)", # noqa: E501
|
| 672 |
+
)
|
| 673 |
+
|
| 674 |
+
# Additional Gradio options
|
| 675 |
+
parser.add_argument("--share", action="store_true", help="Create a public link for the app")
|
| 676 |
+
parser.add_argument("--debug", action="store_true", help="Enable debug mode")
|
| 677 |
+
|
| 678 |
+
# Example caching options
|
| 679 |
+
parser.add_argument(
|
| 680 |
+
"--cache-examples",
|
| 681 |
+
action="store_true",
|
| 682 |
+
help="Pre-cache all example scenes at startup for faster loading",
|
| 683 |
+
)
|
| 684 |
+
parser.add_argument(
|
| 685 |
+
"--cache-gs-tag",
|
| 686 |
+
type=str,
|
| 687 |
+
default="",
|
| 688 |
+
help="Tag to match scene names for high-res+3DGS caching (e.g., 'dl3dv'). Scenes containing this tag will use high_res and infer_gs=True; others will use low_res only.", # noqa: E501
|
| 689 |
+
)
|
| 690 |
+
|
| 691 |
+
args = parser.parse_args()
|
| 692 |
+
|
| 693 |
+
# Create directories if they don't exist
|
| 694 |
+
os.makedirs(args.workspace_dir, exist_ok=True)
|
| 695 |
+
os.makedirs(args.gallery_dir, exist_ok=True)
|
| 696 |
+
|
| 697 |
+
# Initialize and launch the application
|
| 698 |
+
app = DepthAnything3App(
|
| 699 |
+
model_dir=args.model_dir, workspace_dir=args.workspace_dir, gallery_dir=args.gallery_dir
|
| 700 |
+
)
|
| 701 |
+
|
| 702 |
+
# Prepare launch arguments
|
| 703 |
+
launch_kwargs = {"share": args.share, "debug": args.debug}
|
| 704 |
+
|
| 705 |
+
print("Starting Depth Anything 3 Gradio App...")
|
| 706 |
+
print(f"Host: {args.host}")
|
| 707 |
+
print(f"Port: {args.port}")
|
| 708 |
+
print(f"Model Directory: {args.model_dir}")
|
| 709 |
+
print(f"Workspace Directory: {args.workspace_dir}")
|
| 710 |
+
print(f"Gallery Directory: {args.gallery_dir}")
|
| 711 |
+
print(f"Share: {args.share}")
|
| 712 |
+
print(f"Debug: {args.debug}")
|
| 713 |
+
print(f"Cache Examples: {args.cache_examples}")
|
| 714 |
+
if args.cache_examples:
|
| 715 |
+
if args.cache_gs_tag:
|
| 716 |
+
print(
|
| 717 |
+
f"Cache GS Tag: '{args.cache_gs_tag}' (scenes matching this tag will use high-res + 3DGS)" # noqa: E501
|
| 718 |
+
) # noqa: E501
|
| 719 |
+
else:
|
| 720 |
+
print("Cache GS Tag: None (all scenes will use low-res only)")
|
| 721 |
+
|
| 722 |
+
# Pre-cache examples if requested
|
| 723 |
+
if args.cache_examples:
|
| 724 |
+
print("\n" + "=" * 60)
|
| 725 |
+
print("Pre-caching mode enabled")
|
| 726 |
+
if args.cache_gs_tag:
|
| 727 |
+
print(f"Scenes containing '{args.cache_gs_tag}' will use HIGH-RES + 3DGS")
|
| 728 |
+
print("Other scenes will use LOW-RES only")
|
| 729 |
+
else:
|
| 730 |
+
print("All scenes will use LOW-RES only")
|
| 731 |
+
print("=" * 60)
|
| 732 |
+
app.cache_examples(
|
| 733 |
+
show_cam=True,
|
| 734 |
+
filter_black_bg=False,
|
| 735 |
+
filter_white_bg=False,
|
| 736 |
+
save_percentage=5.0,
|
| 737 |
+
num_max_points=1000,
|
| 738 |
+
cache_gs_tag=args.cache_gs_tag,
|
| 739 |
+
gs_trj_mode="smooth",
|
| 740 |
+
gs_video_quality="low",
|
| 741 |
+
)
|
| 742 |
+
|
| 743 |
+
app.launch(host=args.host, port=args.port, **launch_kwargs)
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
if __name__ == "__main__":
|
| 747 |
+
main()
|
src/depth_anything_3/app/modules/__init__.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
Modules package for Depth Anything 3 Gradio app.
|
| 17 |
+
|
| 18 |
+
This package contains all the modular components for the Gradio application.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from depth_anything_3.app.modules.event_handlers import EventHandlers
|
| 22 |
+
from depth_anything_3.app.modules.file_handlers import FileHandler
|
| 23 |
+
from depth_anything_3.app.modules.model_inference import ModelInference
|
| 24 |
+
from depth_anything_3.app.modules.ui_components import UIComponents
|
| 25 |
+
from depth_anything_3.app.modules.utils import (
|
| 26 |
+
cleanup_memory,
|
| 27 |
+
create_depth_visualization,
|
| 28 |
+
get_logo_base64,
|
| 29 |
+
get_scene_info,
|
| 30 |
+
save_to_gallery_func,
|
| 31 |
+
)
|
| 32 |
+
from depth_anything_3.app.modules.visualization import VisualizationHandler
|
| 33 |
+
|
| 34 |
+
__all__ = [
|
| 35 |
+
"ModelInference",
|
| 36 |
+
"FileHandler",
|
| 37 |
+
"VisualizationHandler",
|
| 38 |
+
"EventHandlers",
|
| 39 |
+
"UIComponents",
|
| 40 |
+
"create_depth_visualization",
|
| 41 |
+
"save_to_gallery_func",
|
| 42 |
+
"get_scene_info",
|
| 43 |
+
"cleanup_memory",
|
| 44 |
+
"get_logo_base64",
|
| 45 |
+
]
|
src/depth_anything_3/app/modules/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.22 kB). View file
|
|
|
src/depth_anything_3/app/modules/__pycache__/event_handlers.cpython-311.pyc
ADDED
|
Binary file (25.3 kB). View file
|
|
|
src/depth_anything_3/app/modules/__pycache__/file_handlers.cpython-311.pyc
ADDED
|
Binary file (13 kB). View file
|
|
|
src/depth_anything_3/app/modules/__pycache__/model_inference.cpython-311.pyc
ADDED
|
Binary file (12.3 kB). View file
|
|
|
src/depth_anything_3/app/modules/__pycache__/ui_components.cpython-311.pyc
ADDED
|
Binary file (19.3 kB). View file
|
|
|
src/depth_anything_3/app/modules/__pycache__/utils.cpython-311.pyc
ADDED
|
Binary file (9.22 kB). View file
|
|
|
src/depth_anything_3/app/modules/__pycache__/visualization.cpython-311.pyc
ADDED
|
Binary file (19 kB). View file
|
|
|
src/depth_anything_3/app/modules/event_handlers.py
ADDED
|
@@ -0,0 +1,629 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
Event handling module for Depth Anything 3 Gradio app.
|
| 17 |
+
|
| 18 |
+
This module handles all event callbacks and user interactions.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import os
|
| 22 |
+
import time
|
| 23 |
+
from glob import glob
|
| 24 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 25 |
+
import gradio as gr
|
| 26 |
+
import numpy as np
|
| 27 |
+
import torch
|
| 28 |
+
|
| 29 |
+
from depth_anything_3.app.modules.file_handlers import FileHandler
|
| 30 |
+
from depth_anything_3.app.modules.model_inference import ModelInference
|
| 31 |
+
from depth_anything_3.app.modules.utils import cleanup_memory
|
| 32 |
+
from depth_anything_3.app.modules.visualization import VisualizationHandler
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class EventHandlers:
|
| 36 |
+
"""
|
| 37 |
+
Handles all event callbacks and user interactions for the Gradio app.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self):
|
| 41 |
+
"""Initialize the event handlers."""
|
| 42 |
+
self.model_inference = ModelInference()
|
| 43 |
+
self.file_handler = FileHandler()
|
| 44 |
+
self.visualization_handler = VisualizationHandler()
|
| 45 |
+
|
| 46 |
+
def clear_fields(self) -> None:
|
| 47 |
+
"""
|
| 48 |
+
Clears the 3D viewer, the stored target_dir, and empties the gallery.
|
| 49 |
+
"""
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
def update_log(self) -> str:
|
| 53 |
+
"""
|
| 54 |
+
Display a quick log message while waiting.
|
| 55 |
+
"""
|
| 56 |
+
return "Loading and Reconstructing..."
|
| 57 |
+
|
| 58 |
+
def save_current_visualization(
|
| 59 |
+
self,
|
| 60 |
+
target_dir: str,
|
| 61 |
+
save_percentage: float,
|
| 62 |
+
show_cam: bool,
|
| 63 |
+
filter_black_bg: bool,
|
| 64 |
+
filter_white_bg: bool,
|
| 65 |
+
processed_data: Optional[Dict],
|
| 66 |
+
scene_name: str = "",
|
| 67 |
+
) -> str:
|
| 68 |
+
"""
|
| 69 |
+
Save current visualization results to gallery with specified save percentage.
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
target_dir: Directory containing results
|
| 73 |
+
save_percentage: Percentage of points to save (0-100)
|
| 74 |
+
show_cam: Whether to show cameras
|
| 75 |
+
filter_black_bg: Whether to filter black background
|
| 76 |
+
filter_white_bg: Whether to filter white background
|
| 77 |
+
processed_data: Processed data from reconstruction
|
| 78 |
+
|
| 79 |
+
Returns:
|
| 80 |
+
Status message
|
| 81 |
+
"""
|
| 82 |
+
if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
|
| 83 |
+
return "No reconstruction available. Please run 'Reconstruct' first."
|
| 84 |
+
|
| 85 |
+
if processed_data is None:
|
| 86 |
+
return "No processed data available. Please run 'Reconstruct' first."
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
# Add debug information
|
| 90 |
+
print("[DEBUG] save_current_visualization called with:")
|
| 91 |
+
print(f" target_dir: {target_dir}")
|
| 92 |
+
print(f" save_percentage: {save_percentage}")
|
| 93 |
+
print(f" show_cam: {show_cam}")
|
| 94 |
+
print(f" filter_black_bg: {filter_black_bg}")
|
| 95 |
+
print(f" filter_white_bg: {filter_white_bg}")
|
| 96 |
+
print(f" processed_data: {processed_data is not None}")
|
| 97 |
+
|
| 98 |
+
# Import the gallery save function
|
| 99 |
+
# Create gallery name with user input or auto-generated
|
| 100 |
+
import datetime
|
| 101 |
+
|
| 102 |
+
from .utils import save_to_gallery_func
|
| 103 |
+
|
| 104 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 105 |
+
if scene_name and scene_name.strip():
|
| 106 |
+
gallery_name = f"{scene_name.strip()}_{timestamp}_pct{save_percentage:.0f}"
|
| 107 |
+
else:
|
| 108 |
+
gallery_name = f"save_{timestamp}_pct{save_percentage:.0f}"
|
| 109 |
+
|
| 110 |
+
print(f"[DEBUG] Saving to gallery with name: {gallery_name}")
|
| 111 |
+
|
| 112 |
+
# Save entire process folder to gallery
|
| 113 |
+
success, message = save_to_gallery_func(
|
| 114 |
+
target_dir=target_dir, processed_data=processed_data, gallery_name=gallery_name
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
if success:
|
| 118 |
+
print(f"[DEBUG] Gallery save completed successfully: {message}")
|
| 119 |
+
return (
|
| 120 |
+
"Successfully saved to gallery!\n"
|
| 121 |
+
f"Gallery name: {gallery_name}\n"
|
| 122 |
+
f"Save percentage: {save_percentage}%\n"
|
| 123 |
+
f"Show cameras: {show_cam}\n"
|
| 124 |
+
f"Filter black bg: {filter_black_bg}\n"
|
| 125 |
+
f"Filter white bg: {filter_white_bg}\n\n"
|
| 126 |
+
f"{message}"
|
| 127 |
+
)
|
| 128 |
+
else:
|
| 129 |
+
print(f"[DEBUG] Gallery save failed: {message}")
|
| 130 |
+
return f"Failed to save to gallery: {message}"
|
| 131 |
+
|
| 132 |
+
except Exception as e:
|
| 133 |
+
return f"Error saving visualization: {str(e)}"
|
| 134 |
+
|
| 135 |
+
def gradio_demo(
|
| 136 |
+
self,
|
| 137 |
+
target_dir: str,
|
| 138 |
+
show_cam: bool = True,
|
| 139 |
+
filter_black_bg: bool = False,
|
| 140 |
+
filter_white_bg: bool = False,
|
| 141 |
+
process_res_method: str = "upper_bound_resize",
|
| 142 |
+
selected_first_frame: str = "",
|
| 143 |
+
save_percentage: float = 30.0,
|
| 144 |
+
num_max_points: int = 1_000_000,
|
| 145 |
+
infer_gs: bool = False,
|
| 146 |
+
gs_trj_mode: str = "extend",
|
| 147 |
+
gs_video_quality: str = "high",
|
| 148 |
+
) -> Tuple[
|
| 149 |
+
Optional[str],
|
| 150 |
+
str,
|
| 151 |
+
Optional[Dict],
|
| 152 |
+
Optional[np.ndarray],
|
| 153 |
+
Optional[np.ndarray],
|
| 154 |
+
str,
|
| 155 |
+
gr.Dropdown,
|
| 156 |
+
Optional[str], # gs video path
|
| 157 |
+
gr.update, # gs video visibility update
|
| 158 |
+
gr.update, # gs info visibility update
|
| 159 |
+
]:
|
| 160 |
+
"""
|
| 161 |
+
Perform reconstruction using the already-created target_dir/images.
|
| 162 |
+
|
| 163 |
+
Args:
|
| 164 |
+
target_dir: Directory containing images
|
| 165 |
+
show_cam: Whether to show camera
|
| 166 |
+
filter_black_bg: Whether to filter black background
|
| 167 |
+
filter_white_bg: Whether to filter white background
|
| 168 |
+
process_res_method: Method for resizing input images
|
| 169 |
+
selected_first_frame: Selected first frame filename
|
| 170 |
+
infer_gs: Whether to infer 3D Gaussian Splatting
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
Tuple of reconstruction results
|
| 174 |
+
"""
|
| 175 |
+
if not os.path.isdir(target_dir) or target_dir == "None":
|
| 176 |
+
return (
|
| 177 |
+
None,
|
| 178 |
+
"No valid target directory found. Please upload first.",
|
| 179 |
+
None,
|
| 180 |
+
None,
|
| 181 |
+
None,
|
| 182 |
+
"",
|
| 183 |
+
None,
|
| 184 |
+
None,
|
| 185 |
+
gr.update(visible=False), # gs_video
|
| 186 |
+
gr.update(visible=True), # gs_info
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
start_time = time.time()
|
| 190 |
+
cleanup_memory()
|
| 191 |
+
|
| 192 |
+
# Get image files for logging
|
| 193 |
+
target_dir_images = os.path.join(target_dir, "images")
|
| 194 |
+
all_files = (
|
| 195 |
+
sorted(os.listdir(target_dir_images)) if os.path.isdir(target_dir_images) else []
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
print("Running DepthAnything3 model...")
|
| 199 |
+
print(f"Selected first frame: {selected_first_frame}")
|
| 200 |
+
|
| 201 |
+
# Validate selected_first_frame against current image list
|
| 202 |
+
if selected_first_frame and target_dir_images:
|
| 203 |
+
current_files = (
|
| 204 |
+
sorted(os.listdir(target_dir_images)) if os.path.isdir(target_dir_images) else []
|
| 205 |
+
)
|
| 206 |
+
if selected_first_frame not in current_files:
|
| 207 |
+
print(
|
| 208 |
+
f"Selected first frame '{selected_first_frame}' not found in "
|
| 209 |
+
"current images. Using default order."
|
| 210 |
+
)
|
| 211 |
+
selected_first_frame = "" # Reset to use default order
|
| 212 |
+
|
| 213 |
+
with torch.no_grad():
|
| 214 |
+
prediction, processed_data = self.model_inference.run_inference(
|
| 215 |
+
target_dir,
|
| 216 |
+
process_res_method=process_res_method,
|
| 217 |
+
show_camera=show_cam,
|
| 218 |
+
selected_first_frame=selected_first_frame,
|
| 219 |
+
save_percentage=save_percentage,
|
| 220 |
+
num_max_points=int(num_max_points * 1000), # Convert K to actual count
|
| 221 |
+
infer_gs=infer_gs,
|
| 222 |
+
gs_trj_mode=gs_trj_mode,
|
| 223 |
+
gs_video_quality=gs_video_quality,
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
# The GLB file is already generated by the API
|
| 227 |
+
glbfile = os.path.join(target_dir, "scene.glb")
|
| 228 |
+
|
| 229 |
+
# Handle 3DGS video based on infer_gs flag
|
| 230 |
+
gsvideo_path = None
|
| 231 |
+
gs_video_visible = False
|
| 232 |
+
gs_info_visible = True
|
| 233 |
+
|
| 234 |
+
if infer_gs:
|
| 235 |
+
try:
|
| 236 |
+
gsvideo_path = sorted(glob(os.path.join(target_dir, "gs_video", "*.mp4")))[-1]
|
| 237 |
+
gs_video_visible = True
|
| 238 |
+
gs_info_visible = False
|
| 239 |
+
except IndexError:
|
| 240 |
+
gsvideo_path = None
|
| 241 |
+
print("3DGS video not found, but infer_gs was enabled")
|
| 242 |
+
|
| 243 |
+
# Cleanup
|
| 244 |
+
cleanup_memory()
|
| 245 |
+
|
| 246 |
+
end_time = time.time()
|
| 247 |
+
print(f"Total time: {end_time - start_time:.2f} seconds")
|
| 248 |
+
log_msg = f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization."
|
| 249 |
+
|
| 250 |
+
# Populate visualization tabs with processed data
|
| 251 |
+
depth_vis, measure_img, measure_depth_vis, measure_pts = (
|
| 252 |
+
self.visualization_handler.populate_visualization_tabs(processed_data)
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
# Update view selectors based on available views
|
| 256 |
+
depth_selector, measure_selector = self.visualization_handler.update_view_selectors(
|
| 257 |
+
processed_data
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
return (
|
| 261 |
+
glbfile,
|
| 262 |
+
log_msg,
|
| 263 |
+
processed_data,
|
| 264 |
+
measure_img, # measure_image
|
| 265 |
+
measure_depth_vis, # measure_depth_image
|
| 266 |
+
"", # measure_text (empty initially)
|
| 267 |
+
measure_selector, # measure_view_selector
|
| 268 |
+
gsvideo_path,
|
| 269 |
+
gr.update(visible=gs_video_visible), # gs_video visibility
|
| 270 |
+
gr.update(visible=gs_info_visible), # gs_info visibility
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
def update_visualization(
|
| 274 |
+
self,
|
| 275 |
+
target_dir: str,
|
| 276 |
+
show_cam: bool,
|
| 277 |
+
is_example: str,
|
| 278 |
+
filter_black_bg: bool = False,
|
| 279 |
+
filter_white_bg: bool = False,
|
| 280 |
+
process_res_method: str = "upper_bound_resize",
|
| 281 |
+
) -> Tuple[gr.update, str]:
|
| 282 |
+
"""
|
| 283 |
+
Reload saved predictions from npz, create (or reuse) the GLB for new parameters,
|
| 284 |
+
and return it for the 3D viewer.
|
| 285 |
+
|
| 286 |
+
Args:
|
| 287 |
+
target_dir: Directory containing results
|
| 288 |
+
show_cam: Whether to show camera
|
| 289 |
+
is_example: Whether this is an example scene
|
| 290 |
+
filter_black_bg: Whether to filter black background
|
| 291 |
+
filter_white_bg: Whether to filter white background
|
| 292 |
+
process_res_method: Method for resizing input images
|
| 293 |
+
|
| 294 |
+
Returns:
|
| 295 |
+
Tuple of (glb_file, log_message)
|
| 296 |
+
"""
|
| 297 |
+
if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
|
| 298 |
+
return (
|
| 299 |
+
gr.update(),
|
| 300 |
+
"No reconstruction available. Please click the Reconstruct button first.",
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
# Check if GLB exists (could be cached example or reconstructed scene)
|
| 304 |
+
glbfile = os.path.join(target_dir, "scene.glb")
|
| 305 |
+
if os.path.exists(glbfile):
|
| 306 |
+
return (
|
| 307 |
+
glbfile,
|
| 308 |
+
(
|
| 309 |
+
"Visualization loaded from cache."
|
| 310 |
+
if is_example == "True"
|
| 311 |
+
else "Visualization updated."
|
| 312 |
+
),
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
# If no GLB but it's an example that hasn't been reconstructed yet
|
| 316 |
+
if is_example == "True":
|
| 317 |
+
return (
|
| 318 |
+
gr.update(),
|
| 319 |
+
"No reconstruction available. Please click the Reconstruct button first.",
|
| 320 |
+
)
|
| 321 |
+
|
| 322 |
+
# For non-examples, check predictions.npz
|
| 323 |
+
predictions_path = os.path.join(target_dir, "predictions.npz")
|
| 324 |
+
if not os.path.exists(predictions_path):
|
| 325 |
+
error_message = (
|
| 326 |
+
f"No reconstruction available at {predictions_path}. "
|
| 327 |
+
"Please run 'Reconstruct' first."
|
| 328 |
+
)
|
| 329 |
+
return gr.update(), error_message
|
| 330 |
+
|
| 331 |
+
loaded = np.load(predictions_path, allow_pickle=True)
|
| 332 |
+
predictions = {key: loaded[key] for key in loaded.keys()} # noqa: F841
|
| 333 |
+
|
| 334 |
+
return (
|
| 335 |
+
glbfile,
|
| 336 |
+
"Visualization updated.",
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
def handle_uploads(
|
| 340 |
+
self,
|
| 341 |
+
input_video: Optional[str],
|
| 342 |
+
input_images: Optional[List],
|
| 343 |
+
s_time_interval: float = 10.0,
|
| 344 |
+
) -> Tuple[Optional[str], Optional[str], Optional[List], Optional[str]]:
|
| 345 |
+
"""
|
| 346 |
+
Handle file uploads and update gallery.
|
| 347 |
+
|
| 348 |
+
Args:
|
| 349 |
+
input_video: Path to input video file
|
| 350 |
+
input_images: List of input image files
|
| 351 |
+
s_time_interval: Sampling FPS (frames per second) for frame extraction
|
| 352 |
+
|
| 353 |
+
Returns:
|
| 354 |
+
Tuple of (reconstruction_output, target_dir, image_paths, log_message)
|
| 355 |
+
"""
|
| 356 |
+
return self.file_handler.update_gallery_on_upload(
|
| 357 |
+
input_video, input_images, s_time_interval
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
def load_example_scene(self, scene_name: str, examples_dir: str = None) -> Tuple[
|
| 361 |
+
Optional[str],
|
| 362 |
+
Optional[str],
|
| 363 |
+
Optional[List],
|
| 364 |
+
str,
|
| 365 |
+
Optional[Dict],
|
| 366 |
+
gr.Dropdown,
|
| 367 |
+
Optional[str],
|
| 368 |
+
gr.update,
|
| 369 |
+
gr.update,
|
| 370 |
+
]:
|
| 371 |
+
"""
|
| 372 |
+
Load a scene from examples directory.
|
| 373 |
+
|
| 374 |
+
Args:
|
| 375 |
+
scene_name: Name of the scene to load
|
| 376 |
+
examples_dir: Path to examples directory (if None, uses workspace_dir/examples)
|
| 377 |
+
|
| 378 |
+
Returns:
|
| 379 |
+
Tuple of (reconstruction_output, target_dir, image_paths, log_message, processed_data, measure_view_selector, gs_video, gs_video_vis, gs_info_vis) # noqa: E501
|
| 380 |
+
"""
|
| 381 |
+
if examples_dir is None:
|
| 382 |
+
# Get workspace directory from environment variable
|
| 383 |
+
workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "gradio_workspace")
|
| 384 |
+
examples_dir = os.path.join(workspace_dir, "examples")
|
| 385 |
+
|
| 386 |
+
reconstruction_output, target_dir, image_paths, log_message = (
|
| 387 |
+
self.file_handler.load_example_scene(scene_name, examples_dir)
|
| 388 |
+
)
|
| 389 |
+
|
| 390 |
+
# Try to load cached processed data if available
|
| 391 |
+
processed_data = None
|
| 392 |
+
measure_view_selector = gr.Dropdown(choices=["View 1"], value="View 1")
|
| 393 |
+
gs_video_path = None
|
| 394 |
+
gs_video_visible = False
|
| 395 |
+
gs_info_visible = True
|
| 396 |
+
|
| 397 |
+
if target_dir and target_dir != "None":
|
| 398 |
+
predictions_path = os.path.join(target_dir, "predictions.npz")
|
| 399 |
+
if os.path.exists(predictions_path):
|
| 400 |
+
try:
|
| 401 |
+
# Load predictions from cache
|
| 402 |
+
loaded = np.load(predictions_path, allow_pickle=True)
|
| 403 |
+
predictions = {key: loaded[key] for key in loaded.keys()}
|
| 404 |
+
|
| 405 |
+
# Reconstruct processed_data structure
|
| 406 |
+
num_images = len(predictions.get("images", []))
|
| 407 |
+
processed_data = {}
|
| 408 |
+
|
| 409 |
+
for i in range(num_images):
|
| 410 |
+
processed_data[i] = {
|
| 411 |
+
"image": predictions["images"][i] if "images" in predictions else None,
|
| 412 |
+
"depth": predictions["depths"][i] if "depths" in predictions else None,
|
| 413 |
+
"depth_image": os.path.join(
|
| 414 |
+
target_dir, "depth_vis", f"{i:04d}.jpg" # Fixed: use .jpg not .png
|
| 415 |
+
),
|
| 416 |
+
"intrinsics": (
|
| 417 |
+
predictions["intrinsics"][i]
|
| 418 |
+
if "intrinsics" in predictions
|
| 419 |
+
and i < len(predictions["intrinsics"])
|
| 420 |
+
else None
|
| 421 |
+
),
|
| 422 |
+
"mask": None,
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
# Update measure view selector
|
| 426 |
+
choices = [f"View {i + 1}" for i in range(num_images)]
|
| 427 |
+
measure_view_selector = gr.Dropdown(choices=choices, value=choices[0])
|
| 428 |
+
|
| 429 |
+
except Exception as e:
|
| 430 |
+
print(f"Error loading cached data: {e}")
|
| 431 |
+
|
| 432 |
+
# Check for cached 3DGS video
|
| 433 |
+
gs_video_dir = os.path.join(target_dir, "gs_video")
|
| 434 |
+
if os.path.exists(gs_video_dir):
|
| 435 |
+
try:
|
| 436 |
+
from glob import glob
|
| 437 |
+
|
| 438 |
+
gs_videos = sorted(glob(os.path.join(gs_video_dir, "*.mp4")))
|
| 439 |
+
if gs_videos:
|
| 440 |
+
gs_video_path = gs_videos[-1]
|
| 441 |
+
gs_video_visible = True
|
| 442 |
+
gs_info_visible = False
|
| 443 |
+
print(f"Loaded cached 3DGS video: {gs_video_path}")
|
| 444 |
+
except Exception as e:
|
| 445 |
+
print(f"Error loading cached 3DGS video: {e}")
|
| 446 |
+
|
| 447 |
+
return (
|
| 448 |
+
reconstruction_output,
|
| 449 |
+
target_dir,
|
| 450 |
+
image_paths,
|
| 451 |
+
log_message,
|
| 452 |
+
processed_data,
|
| 453 |
+
measure_view_selector,
|
| 454 |
+
gs_video_path,
|
| 455 |
+
gr.update(visible=gs_video_visible),
|
| 456 |
+
gr.update(visible=gs_info_visible),
|
| 457 |
+
)
|
| 458 |
+
|
| 459 |
+
def navigate_depth_view(
|
| 460 |
+
self,
|
| 461 |
+
processed_data: Optional[Dict[int, Dict[str, Any]]],
|
| 462 |
+
current_selector: str,
|
| 463 |
+
direction: int,
|
| 464 |
+
) -> Tuple[str, Optional[str]]:
|
| 465 |
+
"""
|
| 466 |
+
Navigate depth view.
|
| 467 |
+
|
| 468 |
+
Args:
|
| 469 |
+
processed_data: Processed data dictionary
|
| 470 |
+
current_selector: Current selector value
|
| 471 |
+
direction: Direction to navigate
|
| 472 |
+
|
| 473 |
+
Returns:
|
| 474 |
+
Tuple of (new_selector_value, depth_vis)
|
| 475 |
+
"""
|
| 476 |
+
return self.visualization_handler.navigate_depth_view(
|
| 477 |
+
processed_data, current_selector, direction
|
| 478 |
+
)
|
| 479 |
+
|
| 480 |
+
def update_depth_view(
|
| 481 |
+
self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
|
| 482 |
+
) -> Optional[str]:
|
| 483 |
+
"""
|
| 484 |
+
Update depth view for a specific view index.
|
| 485 |
+
|
| 486 |
+
Args:
|
| 487 |
+
processed_data: Processed data dictionary
|
| 488 |
+
view_index: Index of the view to update
|
| 489 |
+
|
| 490 |
+
Returns:
|
| 491 |
+
Path to depth visualization image or None
|
| 492 |
+
"""
|
| 493 |
+
return self.visualization_handler.update_depth_view(processed_data, view_index)
|
| 494 |
+
|
| 495 |
+
def navigate_measure_view(
|
| 496 |
+
self,
|
| 497 |
+
processed_data: Optional[Dict[int, Dict[str, Any]]],
|
| 498 |
+
current_selector: str,
|
| 499 |
+
direction: int,
|
| 500 |
+
) -> Tuple[str, Optional[np.ndarray], Optional[np.ndarray], List]:
|
| 501 |
+
"""
|
| 502 |
+
Navigate measure view.
|
| 503 |
+
|
| 504 |
+
Args:
|
| 505 |
+
processed_data: Processed data dictionary
|
| 506 |
+
current_selector: Current selector value
|
| 507 |
+
direction: Direction to navigate
|
| 508 |
+
|
| 509 |
+
Returns:
|
| 510 |
+
Tuple of (new_selector_value, measure_image, depth_right_half, measure_points)
|
| 511 |
+
"""
|
| 512 |
+
return self.visualization_handler.navigate_measure_view(
|
| 513 |
+
processed_data, current_selector, direction
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
def update_measure_view(
|
| 517 |
+
self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
|
| 518 |
+
) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], List]:
|
| 519 |
+
"""
|
| 520 |
+
Update measure view for a specific view index.
|
| 521 |
+
|
| 522 |
+
Args:
|
| 523 |
+
processed_data: Processed data dictionary
|
| 524 |
+
view_index: Index of the view to update
|
| 525 |
+
|
| 526 |
+
Returns:
|
| 527 |
+
Tuple of (measure_image, depth_right_half, measure_points)
|
| 528 |
+
"""
|
| 529 |
+
return self.visualization_handler.update_measure_view(processed_data, view_index)
|
| 530 |
+
|
| 531 |
+
def measure(
|
| 532 |
+
self,
|
| 533 |
+
processed_data: Optional[Dict[int, Dict[str, Any]]],
|
| 534 |
+
measure_points: List,
|
| 535 |
+
current_view_selector: str,
|
| 536 |
+
event: gr.SelectData,
|
| 537 |
+
) -> List:
|
| 538 |
+
"""
|
| 539 |
+
Handle measurement on images.
|
| 540 |
+
|
| 541 |
+
Args:
|
| 542 |
+
processed_data: Processed data dictionary
|
| 543 |
+
measure_points: List of current measure points
|
| 544 |
+
current_view_selector: Current view selector value
|
| 545 |
+
event: Gradio select event
|
| 546 |
+
|
| 547 |
+
Returns:
|
| 548 |
+
List of [image, depth_right_half, measure_points, text]
|
| 549 |
+
"""
|
| 550 |
+
return self.visualization_handler.measure(
|
| 551 |
+
processed_data, measure_points, current_view_selector, event
|
| 552 |
+
)
|
| 553 |
+
|
| 554 |
+
def select_first_frame(
|
| 555 |
+
self, image_gallery: List, selected_index: int = 0
|
| 556 |
+
) -> Tuple[List, str, str]:
|
| 557 |
+
"""
|
| 558 |
+
Select the first frame from the image gallery.
|
| 559 |
+
|
| 560 |
+
Args:
|
| 561 |
+
image_gallery: List of images in the gallery
|
| 562 |
+
selected_index: Index of the selected image (default: 0)
|
| 563 |
+
|
| 564 |
+
Returns:
|
| 565 |
+
Tuple of (updated_image_gallery, log_message, selected_frame_path)
|
| 566 |
+
"""
|
| 567 |
+
try:
|
| 568 |
+
if not image_gallery or len(image_gallery) == 0:
|
| 569 |
+
return image_gallery, "No images available to select as first frame.", ""
|
| 570 |
+
|
| 571 |
+
# Handle None or invalid selected_index
|
| 572 |
+
if (
|
| 573 |
+
selected_index is None
|
| 574 |
+
or selected_index < 0
|
| 575 |
+
or selected_index >= len(image_gallery)
|
| 576 |
+
):
|
| 577 |
+
selected_index = 0
|
| 578 |
+
print(f"Invalid selected_index: {selected_index}, using default: 0")
|
| 579 |
+
|
| 580 |
+
# Get the selected image based on index
|
| 581 |
+
selected_image = image_gallery[selected_index]
|
| 582 |
+
print(f"Selected image index: {selected_index}")
|
| 583 |
+
print(f"Total images: {len(image_gallery)}")
|
| 584 |
+
|
| 585 |
+
# Extract the file path from the selected image
|
| 586 |
+
selected_frame_path = ""
|
| 587 |
+
print(f"Selected image type: {type(selected_image)}")
|
| 588 |
+
print(f"Selected image: {selected_image}")
|
| 589 |
+
|
| 590 |
+
if isinstance(selected_image, tuple):
|
| 591 |
+
# Gradio Gallery returns tuple (path, None)
|
| 592 |
+
selected_frame_path = selected_image[0]
|
| 593 |
+
elif isinstance(selected_image, str):
|
| 594 |
+
selected_frame_path = selected_image
|
| 595 |
+
elif hasattr(selected_image, "name"):
|
| 596 |
+
selected_frame_path = selected_image.name
|
| 597 |
+
elif isinstance(selected_image, dict):
|
| 598 |
+
if "name" in selected_image:
|
| 599 |
+
selected_frame_path = selected_image["name"]
|
| 600 |
+
elif "path" in selected_image:
|
| 601 |
+
selected_frame_path = selected_image["path"]
|
| 602 |
+
elif "src" in selected_image:
|
| 603 |
+
selected_frame_path = selected_image["src"]
|
| 604 |
+
else:
|
| 605 |
+
# Try to convert to string
|
| 606 |
+
selected_frame_path = str(selected_image)
|
| 607 |
+
|
| 608 |
+
print(f"Extracted path: {selected_frame_path}")
|
| 609 |
+
|
| 610 |
+
# Extract filename from the path for matching
|
| 611 |
+
import os
|
| 612 |
+
|
| 613 |
+
selected_filename = os.path.basename(selected_frame_path)
|
| 614 |
+
print(f"Selected filename: {selected_filename}")
|
| 615 |
+
|
| 616 |
+
# Move the selected image to the front
|
| 617 |
+
updated_gallery = [selected_image] + [
|
| 618 |
+
img for img in image_gallery if img != selected_image
|
| 619 |
+
]
|
| 620 |
+
|
| 621 |
+
log_message = (
|
| 622 |
+
f"Selected frame: {selected_filename}. "
|
| 623 |
+
f"Moved to first position. Total frames: {len(updated_gallery)}"
|
| 624 |
+
)
|
| 625 |
+
return updated_gallery, log_message, selected_filename
|
| 626 |
+
|
| 627 |
+
except Exception as e:
|
| 628 |
+
print(f"Error selecting first frame: {e}")
|
| 629 |
+
return image_gallery, f"Error selecting first frame: {e}", ""
|
src/depth_anything_3/app/modules/file_handlers.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
File handling module for Depth Anything 3 Gradio app.
|
| 17 |
+
|
| 18 |
+
This module handles file uploads, video processing, and file operations.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import os
|
| 22 |
+
import shutil
|
| 23 |
+
import time
|
| 24 |
+
from datetime import datetime
|
| 25 |
+
from typing import List, Optional, Tuple
|
| 26 |
+
import cv2
|
| 27 |
+
from PIL import Image
|
| 28 |
+
from pillow_heif import register_heif_opener
|
| 29 |
+
|
| 30 |
+
register_heif_opener()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class FileHandler:
|
| 34 |
+
"""
|
| 35 |
+
Handles file uploads and processing for the Gradio app.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def __init__(self):
|
| 39 |
+
"""Initialize the file handler."""
|
| 40 |
+
|
| 41 |
+
def handle_uploads(
|
| 42 |
+
self,
|
| 43 |
+
input_video: Optional[str],
|
| 44 |
+
input_images: Optional[List],
|
| 45 |
+
s_time_interval: float = 10.0,
|
| 46 |
+
) -> Tuple[str, List[str]]:
|
| 47 |
+
"""
|
| 48 |
+
Create a new 'target_dir' + 'images' subfolder, and place user-uploaded
|
| 49 |
+
images or extracted frames from video into it.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
input_video: Path to input video file
|
| 53 |
+
input_images: List of input image files
|
| 54 |
+
s_time_interval: Sampling FPS (frames per second) for frame extraction
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
Tuple of (target_dir, image_paths)
|
| 58 |
+
"""
|
| 59 |
+
start_time = time.time()
|
| 60 |
+
|
| 61 |
+
# Get workspace directory from environment variable or use default
|
| 62 |
+
workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "gradio_workspace")
|
| 63 |
+
if not os.path.exists(workspace_dir):
|
| 64 |
+
os.makedirs(workspace_dir)
|
| 65 |
+
|
| 66 |
+
# Create input_images subdirectory
|
| 67 |
+
input_images_dir = os.path.join(workspace_dir, "input_images")
|
| 68 |
+
if not os.path.exists(input_images_dir):
|
| 69 |
+
os.makedirs(input_images_dir)
|
| 70 |
+
|
| 71 |
+
# Create a unique folder name within input_images
|
| 72 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
| 73 |
+
target_dir = os.path.join(input_images_dir, f"session_{timestamp}")
|
| 74 |
+
target_dir_images = os.path.join(target_dir, "images")
|
| 75 |
+
|
| 76 |
+
# Clean up if somehow that folder already exists
|
| 77 |
+
if os.path.exists(target_dir):
|
| 78 |
+
shutil.rmtree(target_dir)
|
| 79 |
+
os.makedirs(target_dir)
|
| 80 |
+
os.makedirs(target_dir_images)
|
| 81 |
+
|
| 82 |
+
image_paths = []
|
| 83 |
+
|
| 84 |
+
# Handle images
|
| 85 |
+
if input_images is not None:
|
| 86 |
+
image_paths.extend(self._process_images(input_images, target_dir_images))
|
| 87 |
+
|
| 88 |
+
# Handle video
|
| 89 |
+
if input_video is not None:
|
| 90 |
+
image_paths.extend(
|
| 91 |
+
self._process_video(input_video, target_dir_images, s_time_interval)
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Sort final images for gallery
|
| 95 |
+
image_paths = sorted(image_paths)
|
| 96 |
+
|
| 97 |
+
end_time = time.time()
|
| 98 |
+
print(f"Files copied to {target_dir_images}; took {end_time - start_time:.3f} seconds")
|
| 99 |
+
return target_dir, image_paths
|
| 100 |
+
|
| 101 |
+
def _process_images(self, input_images: List, target_dir_images: str) -> List[str]:
|
| 102 |
+
"""
|
| 103 |
+
Process uploaded images.
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
input_images: List of input image files
|
| 107 |
+
target_dir_images: Target directory for images
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
List of processed image paths
|
| 111 |
+
"""
|
| 112 |
+
image_paths = []
|
| 113 |
+
|
| 114 |
+
for file_data in input_images:
|
| 115 |
+
if isinstance(file_data, dict) and "name" in file_data:
|
| 116 |
+
file_path = file_data["name"]
|
| 117 |
+
else:
|
| 118 |
+
file_path = file_data
|
| 119 |
+
|
| 120 |
+
# Check if the file is a HEIC image
|
| 121 |
+
file_ext = os.path.splitext(file_path)[1].lower()
|
| 122 |
+
if file_ext in [".heic", ".heif"]:
|
| 123 |
+
# Convert HEIC to JPEG for better gallery compatibility
|
| 124 |
+
try:
|
| 125 |
+
with Image.open(file_path) as img:
|
| 126 |
+
# Convert to RGB if necessary (HEIC can have different color modes)
|
| 127 |
+
if img.mode not in ("RGB", "L"):
|
| 128 |
+
img = img.convert("RGB")
|
| 129 |
+
|
| 130 |
+
# Create JPEG filename
|
| 131 |
+
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
| 132 |
+
dst_path = os.path.join(target_dir_images, f"{base_name}.jpg")
|
| 133 |
+
|
| 134 |
+
# Save as JPEG with high quality
|
| 135 |
+
img.save(dst_path, "JPEG", quality=95)
|
| 136 |
+
image_paths.append(dst_path)
|
| 137 |
+
print(
|
| 138 |
+
f"Converted HEIC to JPEG: {os.path.basename(file_path)} -> "
|
| 139 |
+
f"{os.path.basename(dst_path)}"
|
| 140 |
+
)
|
| 141 |
+
except Exception as e:
|
| 142 |
+
print(f"Error converting HEIC file {file_path}: {e}")
|
| 143 |
+
# Fall back to copying as is
|
| 144 |
+
dst_path = os.path.join(target_dir_images, os.path.basename(file_path))
|
| 145 |
+
shutil.copy(file_path, dst_path)
|
| 146 |
+
image_paths.append(dst_path)
|
| 147 |
+
else:
|
| 148 |
+
# Regular image files - copy as is
|
| 149 |
+
dst_path = os.path.join(target_dir_images, os.path.basename(file_path))
|
| 150 |
+
shutil.copy(file_path, dst_path)
|
| 151 |
+
image_paths.append(dst_path)
|
| 152 |
+
|
| 153 |
+
return image_paths
|
| 154 |
+
|
| 155 |
+
def _process_video(
|
| 156 |
+
self, input_video: str, target_dir_images: str, s_time_interval: float
|
| 157 |
+
) -> List[str]:
|
| 158 |
+
"""
|
| 159 |
+
Process video file and extract frames.
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
input_video: Path to input video file
|
| 163 |
+
target_dir_images: Target directory for extracted frames
|
| 164 |
+
s_time_interval: Sampling FPS (frames per second) for frame extraction
|
| 165 |
+
|
| 166 |
+
Returns:
|
| 167 |
+
List of extracted frame paths
|
| 168 |
+
"""
|
| 169 |
+
image_paths = []
|
| 170 |
+
|
| 171 |
+
if isinstance(input_video, dict) and "name" in input_video:
|
| 172 |
+
video_path = input_video["name"]
|
| 173 |
+
else:
|
| 174 |
+
video_path = input_video
|
| 175 |
+
|
| 176 |
+
vs = cv2.VideoCapture(video_path)
|
| 177 |
+
fps = vs.get(cv2.CAP_PROP_FPS)
|
| 178 |
+
frame_interval = max(1, int(fps / s_time_interval)) # Convert FPS to frame interval
|
| 179 |
+
|
| 180 |
+
count = 0
|
| 181 |
+
video_frame_num = 0
|
| 182 |
+
while True:
|
| 183 |
+
gotit, frame = vs.read()
|
| 184 |
+
if not gotit:
|
| 185 |
+
break
|
| 186 |
+
count += 1
|
| 187 |
+
if count % frame_interval == 0:
|
| 188 |
+
image_path = os.path.join(target_dir_images, f"{video_frame_num:06}.png")
|
| 189 |
+
cv2.imwrite(image_path, frame)
|
| 190 |
+
image_paths.append(image_path)
|
| 191 |
+
video_frame_num += 1
|
| 192 |
+
|
| 193 |
+
return image_paths
|
| 194 |
+
|
| 195 |
+
def update_gallery_on_upload(
|
| 196 |
+
self,
|
| 197 |
+
input_video: Optional[str],
|
| 198 |
+
input_images: Optional[List],
|
| 199 |
+
s_time_interval: float = 10.0,
|
| 200 |
+
) -> Tuple[Optional[str], Optional[str], Optional[List], Optional[str]]:
|
| 201 |
+
"""
|
| 202 |
+
Handle file uploads and update gallery.
|
| 203 |
+
|
| 204 |
+
Args:
|
| 205 |
+
input_video: Path to input video file
|
| 206 |
+
input_images: List of input image files
|
| 207 |
+
s_time_interval: Sampling FPS (frames per second) for frame extraction
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
Tuple of (reconstruction_output, target_dir, image_paths, log_message)
|
| 211 |
+
"""
|
| 212 |
+
if not input_video and not input_images:
|
| 213 |
+
return None, None, None, None
|
| 214 |
+
|
| 215 |
+
target_dir, image_paths = self.handle_uploads(input_video, input_images, s_time_interval)
|
| 216 |
+
return (
|
| 217 |
+
None,
|
| 218 |
+
target_dir,
|
| 219 |
+
image_paths,
|
| 220 |
+
"Upload complete. Click 'Reconstruct' to begin 3D processing.",
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
def load_example_scene(
|
| 224 |
+
self, scene_name: str, examples_dir: str = "examples"
|
| 225 |
+
) -> Tuple[Optional[str], Optional[str], Optional[List], str]:
|
| 226 |
+
"""
|
| 227 |
+
Load a scene from examples directory.
|
| 228 |
+
|
| 229 |
+
Args:
|
| 230 |
+
scene_name: Name of the scene to load
|
| 231 |
+
examples_dir: Path to examples directory
|
| 232 |
+
|
| 233 |
+
Returns:
|
| 234 |
+
Tuple of (reconstruction_output, target_dir, image_paths, log_message)
|
| 235 |
+
"""
|
| 236 |
+
from depth_anything_3.app.modules.utils import get_scene_info
|
| 237 |
+
|
| 238 |
+
scenes = get_scene_info(examples_dir)
|
| 239 |
+
|
| 240 |
+
# Find the selected scene
|
| 241 |
+
selected_scene = None
|
| 242 |
+
for scene in scenes:
|
| 243 |
+
if scene["name"] == scene_name:
|
| 244 |
+
selected_scene = scene
|
| 245 |
+
break
|
| 246 |
+
|
| 247 |
+
if selected_scene is None:
|
| 248 |
+
return None, None, None, "Scene not found"
|
| 249 |
+
|
| 250 |
+
# Use fixed directory name for examples (not timestamp-based)
|
| 251 |
+
workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "gradio_workspace")
|
| 252 |
+
input_images_dir = os.path.join(workspace_dir, "input_images")
|
| 253 |
+
if not os.path.exists(input_images_dir):
|
| 254 |
+
os.makedirs(input_images_dir)
|
| 255 |
+
|
| 256 |
+
# Create a fixed folder name based on scene name
|
| 257 |
+
target_dir = os.path.join(input_images_dir, f"example_{scene_name}")
|
| 258 |
+
target_dir_images = os.path.join(target_dir, "images")
|
| 259 |
+
|
| 260 |
+
# Check if already cached (GLB file exists)
|
| 261 |
+
glb_path = os.path.join(target_dir, "scene.glb")
|
| 262 |
+
is_cached = os.path.exists(glb_path)
|
| 263 |
+
|
| 264 |
+
# Create directory if it doesn't exist
|
| 265 |
+
if not os.path.exists(target_dir):
|
| 266 |
+
os.makedirs(target_dir)
|
| 267 |
+
os.makedirs(target_dir_images)
|
| 268 |
+
|
| 269 |
+
# Copy images if directory is new or empty
|
| 270 |
+
if not os.path.exists(target_dir_images) or len(os.listdir(target_dir_images)) == 0:
|
| 271 |
+
os.makedirs(target_dir_images, exist_ok=True)
|
| 272 |
+
image_paths = []
|
| 273 |
+
for file_path in selected_scene["image_files"]:
|
| 274 |
+
dst_path = os.path.join(target_dir_images, os.path.basename(file_path))
|
| 275 |
+
shutil.copy(file_path, dst_path)
|
| 276 |
+
image_paths.append(dst_path)
|
| 277 |
+
else:
|
| 278 |
+
# Use existing images
|
| 279 |
+
image_paths = sorted(
|
| 280 |
+
[
|
| 281 |
+
os.path.join(target_dir_images, f)
|
| 282 |
+
for f in os.listdir(target_dir_images)
|
| 283 |
+
if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".tif"))
|
| 284 |
+
]
|
| 285 |
+
)
|
| 286 |
+
|
| 287 |
+
# Return cached GLB if available
|
| 288 |
+
if is_cached:
|
| 289 |
+
return (
|
| 290 |
+
glb_path, # Return cached reconstruction
|
| 291 |
+
target_dir, # Set target directory
|
| 292 |
+
image_paths, # Set gallery
|
| 293 |
+
f"Loaded cached scene '{scene_name}' with {selected_scene['num_images']} images.",
|
| 294 |
+
)
|
| 295 |
+
else:
|
| 296 |
+
return (
|
| 297 |
+
None, # No cached reconstruction
|
| 298 |
+
target_dir, # Set target directory
|
| 299 |
+
image_paths, # Set gallery
|
| 300 |
+
(
|
| 301 |
+
f"Loaded scene '{scene_name}' with {selected_scene['num_images']} images. "
|
| 302 |
+
"Click 'Reconstruct' to begin 3D processing."
|
| 303 |
+
),
|
| 304 |
+
)
|
src/depth_anything_3/app/modules/model_inference.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
Model inference module for Depth Anything 3 Gradio app.
|
| 17 |
+
|
| 18 |
+
This module handles all model-related operations including inference,
|
| 19 |
+
data processing, and result preparation.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import gc
|
| 23 |
+
import glob
|
| 24 |
+
import os
|
| 25 |
+
from typing import Any, Dict, Optional, Tuple
|
| 26 |
+
import numpy as np
|
| 27 |
+
import torch
|
| 28 |
+
|
| 29 |
+
from depth_anything_3.api import DepthAnything3
|
| 30 |
+
from depth_anything_3.utils.export.glb import export_to_glb
|
| 31 |
+
from depth_anything_3.utils.export.gs import export_to_gs_video
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class ModelInference:
|
| 35 |
+
"""
|
| 36 |
+
Handles model inference and data processing for Depth Anything 3.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
def __init__(self):
|
| 40 |
+
"""Initialize the model inference handler."""
|
| 41 |
+
self.model = None
|
| 42 |
+
|
| 43 |
+
def initialize_model(self, device: str = "cuda") -> None:
|
| 44 |
+
"""
|
| 45 |
+
Initialize the DepthAnything3 model.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
device: Device to load the model on
|
| 49 |
+
"""
|
| 50 |
+
if self.model is None:
|
| 51 |
+
# Get model directory from environment variable or use default
|
| 52 |
+
model_dir = os.environ.get(
|
| 53 |
+
"DA3_MODEL_DIR", "/dev/shm/da3_models/DA3HF-VITG-METRIC_VITL"
|
| 54 |
+
)
|
| 55 |
+
self.model = DepthAnything3.from_pretrained(model_dir)
|
| 56 |
+
self.model = self.model.to(device)
|
| 57 |
+
else:
|
| 58 |
+
self.model = self.model.to(device)
|
| 59 |
+
|
| 60 |
+
self.model.eval()
|
| 61 |
+
|
| 62 |
+
def run_inference(
|
| 63 |
+
self,
|
| 64 |
+
target_dir: str,
|
| 65 |
+
filter_black_bg: bool = False,
|
| 66 |
+
filter_white_bg: bool = False,
|
| 67 |
+
process_res_method: str = "upper_bound_resize",
|
| 68 |
+
show_camera: bool = True,
|
| 69 |
+
selected_first_frame: Optional[str] = None,
|
| 70 |
+
save_percentage: float = 30.0,
|
| 71 |
+
num_max_points: int = 1_000_000,
|
| 72 |
+
infer_gs: bool = False,
|
| 73 |
+
gs_trj_mode: str = "extend",
|
| 74 |
+
gs_video_quality: str = "high",
|
| 75 |
+
) -> Tuple[Any, Dict[int, Dict[str, Any]]]:
|
| 76 |
+
"""
|
| 77 |
+
Run DepthAnything3 model inference on images.
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
target_dir: Directory containing images
|
| 81 |
+
apply_mask: Whether to apply mask for ambiguous depth classes
|
| 82 |
+
mask_edges: Whether to mask edges
|
| 83 |
+
filter_black_bg: Whether to filter black background
|
| 84 |
+
filter_white_bg: Whether to filter white background
|
| 85 |
+
process_res_method: Method for resizing input images
|
| 86 |
+
show_camera: Whether to show camera in 3D view
|
| 87 |
+
selected_first_frame: Selected first frame filename
|
| 88 |
+
save_percentage: Percentage of points to save (0-100)
|
| 89 |
+
infer_gs: Whether to infer 3D Gaussian Splatting
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
Tuple of (prediction, processed_data)
|
| 93 |
+
"""
|
| 94 |
+
print(f"Processing images from {target_dir}")
|
| 95 |
+
|
| 96 |
+
# Device check
|
| 97 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 98 |
+
device = torch.device(device)
|
| 99 |
+
|
| 100 |
+
# Initialize model if needed
|
| 101 |
+
self.initialize_model(device)
|
| 102 |
+
|
| 103 |
+
# Get image paths
|
| 104 |
+
print("Loading images...")
|
| 105 |
+
image_folder_path = os.path.join(target_dir, "images")
|
| 106 |
+
all_image_paths = sorted(glob.glob(os.path.join(image_folder_path, "*")))
|
| 107 |
+
|
| 108 |
+
# Filter for image files
|
| 109 |
+
image_extensions = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]
|
| 110 |
+
all_image_paths = [
|
| 111 |
+
path
|
| 112 |
+
for path in all_image_paths
|
| 113 |
+
if any(path.lower().endswith(ext) for ext in image_extensions)
|
| 114 |
+
]
|
| 115 |
+
|
| 116 |
+
print(f"Found {len(all_image_paths)} images")
|
| 117 |
+
print(f"All image paths: {all_image_paths}")
|
| 118 |
+
|
| 119 |
+
# Apply first frame selection logic
|
| 120 |
+
if selected_first_frame:
|
| 121 |
+
# Find the image with matching filename
|
| 122 |
+
selected_path = None
|
| 123 |
+
for path in all_image_paths:
|
| 124 |
+
if os.path.basename(path) == selected_first_frame:
|
| 125 |
+
selected_path = path
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
if selected_path:
|
| 129 |
+
# Move selected frame to the front
|
| 130 |
+
image_paths = [selected_path] + [
|
| 131 |
+
path for path in all_image_paths if path != selected_path
|
| 132 |
+
]
|
| 133 |
+
print(f"User selected first frame: {selected_first_frame} -> {selected_path}")
|
| 134 |
+
print(f"Reordered image paths: {image_paths}")
|
| 135 |
+
else:
|
| 136 |
+
# Use default order if no match found
|
| 137 |
+
image_paths = all_image_paths
|
| 138 |
+
print(
|
| 139 |
+
f"Selected frame '{selected_first_frame}' not found in image paths. "
|
| 140 |
+
"Using default order."
|
| 141 |
+
)
|
| 142 |
+
first_frame_display = image_paths[0] if image_paths else "No images"
|
| 143 |
+
print(f"Using default order (first frame): {first_frame_display}")
|
| 144 |
+
else:
|
| 145 |
+
# Use default order (sorted)
|
| 146 |
+
image_paths = all_image_paths
|
| 147 |
+
first_frame_display = image_paths[0] if image_paths else "No images"
|
| 148 |
+
print(f"Using default order (first frame): {first_frame_display}")
|
| 149 |
+
|
| 150 |
+
if len(image_paths) == 0:
|
| 151 |
+
raise ValueError("No images found. Check your upload.")
|
| 152 |
+
|
| 153 |
+
# Map UI options to actual method names
|
| 154 |
+
method_mapping = {"high_res": "lower_bound_resize", "low_res": "upper_bound_resize"}
|
| 155 |
+
actual_method = method_mapping.get(process_res_method, "upper_bound_crop")
|
| 156 |
+
|
| 157 |
+
# Run model inference
|
| 158 |
+
print(f"Running inference with method: {actual_method}")
|
| 159 |
+
with torch.no_grad():
|
| 160 |
+
prediction = self.model.inference(
|
| 161 |
+
image_paths, export_dir=None, process_res_method=actual_method, infer_gs=infer_gs
|
| 162 |
+
)
|
| 163 |
+
# num_max_points: int = 1_000_000,
|
| 164 |
+
export_to_glb(
|
| 165 |
+
prediction,
|
| 166 |
+
filter_black_bg=filter_black_bg,
|
| 167 |
+
filter_white_bg=filter_white_bg,
|
| 168 |
+
export_dir=target_dir,
|
| 169 |
+
show_cameras=show_camera,
|
| 170 |
+
conf_thresh_percentile=save_percentage,
|
| 171 |
+
num_max_points=int(num_max_points),
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# export to gs video if needed
|
| 175 |
+
if infer_gs:
|
| 176 |
+
mode_mapping = {"extend": "extend", "smooth": "interpolate_smooth"}
|
| 177 |
+
print(f"GS mode: {gs_trj_mode}; Backend mode: {mode_mapping[gs_trj_mode]}")
|
| 178 |
+
export_to_gs_video(
|
| 179 |
+
prediction,
|
| 180 |
+
export_dir=target_dir,
|
| 181 |
+
chunk_size=4,
|
| 182 |
+
trj_mode=mode_mapping.get(gs_trj_mode, "extend"),
|
| 183 |
+
enable_tqdm=True,
|
| 184 |
+
vis_depth="hcat",
|
| 185 |
+
video_quality=gs_video_quality,
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
# Save predictions.npz for caching metric depth data
|
| 189 |
+
self._save_predictions_cache(target_dir, prediction)
|
| 190 |
+
|
| 191 |
+
# Process results
|
| 192 |
+
processed_data = self._process_results(target_dir, prediction, image_paths)
|
| 193 |
+
|
| 194 |
+
# Clean up
|
| 195 |
+
torch.cuda.empty_cache()
|
| 196 |
+
|
| 197 |
+
return prediction, processed_data
|
| 198 |
+
|
| 199 |
+
def _save_predictions_cache(self, target_dir: str, prediction: Any) -> None:
|
| 200 |
+
"""
|
| 201 |
+
Save predictions data to predictions.npz for caching.
|
| 202 |
+
|
| 203 |
+
Args:
|
| 204 |
+
target_dir: Directory to save the cache
|
| 205 |
+
prediction: Model prediction object
|
| 206 |
+
"""
|
| 207 |
+
try:
|
| 208 |
+
output_file = os.path.join(target_dir, "predictions.npz")
|
| 209 |
+
|
| 210 |
+
# Build save dict with prediction data
|
| 211 |
+
save_dict = {}
|
| 212 |
+
|
| 213 |
+
# Save processed images if available
|
| 214 |
+
if prediction.processed_images is not None:
|
| 215 |
+
save_dict["images"] = prediction.processed_images
|
| 216 |
+
|
| 217 |
+
# Save depth data
|
| 218 |
+
if prediction.depth is not None:
|
| 219 |
+
save_dict["depths"] = np.round(prediction.depth, 6)
|
| 220 |
+
|
| 221 |
+
# Save confidence if available
|
| 222 |
+
if prediction.conf is not None:
|
| 223 |
+
save_dict["conf"] = np.round(prediction.conf, 2)
|
| 224 |
+
|
| 225 |
+
# Save camera parameters
|
| 226 |
+
if prediction.extrinsics is not None:
|
| 227 |
+
save_dict["extrinsics"] = prediction.extrinsics
|
| 228 |
+
if prediction.intrinsics is not None:
|
| 229 |
+
save_dict["intrinsics"] = prediction.intrinsics
|
| 230 |
+
|
| 231 |
+
# Save to file
|
| 232 |
+
np.savez_compressed(output_file, **save_dict)
|
| 233 |
+
print(f"Saved predictions cache to: {output_file}")
|
| 234 |
+
|
| 235 |
+
except Exception as e:
|
| 236 |
+
print(f"Warning: Failed to save predictions cache: {e}")
|
| 237 |
+
|
| 238 |
+
def _process_results(
|
| 239 |
+
self, target_dir: str, prediction: Any, image_paths: list
|
| 240 |
+
) -> Dict[int, Dict[str, Any]]:
|
| 241 |
+
"""
|
| 242 |
+
Process model results into structured data.
|
| 243 |
+
|
| 244 |
+
Args:
|
| 245 |
+
target_dir: Directory containing results
|
| 246 |
+
prediction: Model prediction object
|
| 247 |
+
image_paths: List of input image paths
|
| 248 |
+
|
| 249 |
+
Returns:
|
| 250 |
+
Dictionary containing processed data for each view
|
| 251 |
+
"""
|
| 252 |
+
processed_data = {}
|
| 253 |
+
|
| 254 |
+
# Read generated depth visualization files
|
| 255 |
+
depth_vis_dir = os.path.join(target_dir, "depth_vis")
|
| 256 |
+
|
| 257 |
+
if os.path.exists(depth_vis_dir):
|
| 258 |
+
depth_files = sorted(glob.glob(os.path.join(depth_vis_dir, "*.jpg")))
|
| 259 |
+
for i, depth_file in enumerate(depth_files):
|
| 260 |
+
# Use processed images directly from API
|
| 261 |
+
processed_image = None
|
| 262 |
+
if prediction.processed_images is not None and i < len(
|
| 263 |
+
prediction.processed_images
|
| 264 |
+
):
|
| 265 |
+
processed_image = prediction.processed_images[i]
|
| 266 |
+
|
| 267 |
+
processed_data[i] = {
|
| 268 |
+
"depth_image": depth_file,
|
| 269 |
+
"image": processed_image,
|
| 270 |
+
"original_image_path": image_paths[i] if i < len(image_paths) else None,
|
| 271 |
+
"depth": prediction.depth[i] if i < len(prediction.depth) else None,
|
| 272 |
+
"intrinsics": (
|
| 273 |
+
prediction.intrinsics[i]
|
| 274 |
+
if prediction.intrinsics is not None and i < len(prediction.intrinsics)
|
| 275 |
+
else None
|
| 276 |
+
),
|
| 277 |
+
"mask": None, # No mask information available
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
return processed_data
|
| 281 |
+
|
| 282 |
+
def cleanup(self) -> None:
|
| 283 |
+
"""Clean up GPU memory."""
|
| 284 |
+
if torch.cuda.is_available():
|
| 285 |
+
torch.cuda.empty_cache()
|
| 286 |
+
gc.collect()
|
src/depth_anything_3/app/modules/ui_components.py
ADDED
|
@@ -0,0 +1,474 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
UI components module for Depth Anything 3 Gradio app.
|
| 17 |
+
|
| 18 |
+
This module contains UI component definitions and layout functions.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import os
|
| 22 |
+
from typing import Any, Dict, List, Tuple
|
| 23 |
+
import gradio as gr
|
| 24 |
+
|
| 25 |
+
from depth_anything_3.app.modules.utils import get_logo_base64, get_scene_info
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class UIComponents:
|
| 29 |
+
"""
|
| 30 |
+
Handles UI component creation and layout for the Gradio app.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self):
|
| 34 |
+
"""Initialize the UI components handler."""
|
| 35 |
+
|
| 36 |
+
def create_upload_section(self) -> Tuple[gr.Video, gr.Slider, gr.File, gr.Gallery, gr.Button]:
|
| 37 |
+
"""
|
| 38 |
+
Create the upload section with video, images, and gallery components.
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
A tuple of Gradio components: (input_video, s_time_interval, input_images,
|
| 42 |
+
image_gallery, select_first_frame_btn).
|
| 43 |
+
"""
|
| 44 |
+
input_video = gr.Video(label="Upload Video", interactive=True)
|
| 45 |
+
s_time_interval = gr.Slider(
|
| 46 |
+
minimum=0.1,
|
| 47 |
+
maximum=60,
|
| 48 |
+
value=10,
|
| 49 |
+
step=0.1,
|
| 50 |
+
label="Sampling FPS (Frames Per Second)",
|
| 51 |
+
interactive=True,
|
| 52 |
+
visible=True,
|
| 53 |
+
)
|
| 54 |
+
input_images = gr.File(file_count="multiple", label="Upload Images", interactive=True)
|
| 55 |
+
image_gallery = gr.Gallery(
|
| 56 |
+
label="Preview",
|
| 57 |
+
columns=4,
|
| 58 |
+
height="300px",
|
| 59 |
+
show_download_button=True,
|
| 60 |
+
object_fit="contain",
|
| 61 |
+
preview=True,
|
| 62 |
+
interactive=False,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Select first frame button (moved below image gallery)
|
| 66 |
+
select_first_frame_btn = gr.Button("Select First Frame", scale=1)
|
| 67 |
+
|
| 68 |
+
return input_video, s_time_interval, input_images, image_gallery, select_first_frame_btn
|
| 69 |
+
|
| 70 |
+
def create_3d_viewer_section(self) -> gr.Model3D:
|
| 71 |
+
"""
|
| 72 |
+
Create the 3D viewer component.
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
3D model viewer component
|
| 76 |
+
"""
|
| 77 |
+
return gr.Model3D(
|
| 78 |
+
height=520,
|
| 79 |
+
zoom_speed=0.5,
|
| 80 |
+
pan_speed=0.5,
|
| 81 |
+
clear_color=[0.0, 0.0, 0.0, 0.0],
|
| 82 |
+
key="persistent_3d_viewer",
|
| 83 |
+
elem_id="reconstruction_3d_viewer",
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
def create_nvs_video(self) -> Tuple[gr.Video, gr.Markdown]:
|
| 87 |
+
"""
|
| 88 |
+
Create the 3DGS rendered video display component and info message.
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
Tuple of (video component, info message component)
|
| 92 |
+
"""
|
| 93 |
+
with gr.Column():
|
| 94 |
+
gs_info = gr.Markdown(
|
| 95 |
+
(
|
| 96 |
+
"‼️ **3D Gaussian Splatting rendering is currently DISABLED.** <br><br><br>"
|
| 97 |
+
"To render novel views from 3DGS, "
|
| 98 |
+
"enable **Infer 3D Gaussian Splatting** below. <br>"
|
| 99 |
+
"Next, in **Visualization Options**, "
|
| 100 |
+
"*optionally* configure the **rendering trajectory** (default: smooth) "
|
| 101 |
+
"and **video quality** (default: low), "
|
| 102 |
+
"then click **Reconstruct**."
|
| 103 |
+
),
|
| 104 |
+
visible=True,
|
| 105 |
+
height=520,
|
| 106 |
+
)
|
| 107 |
+
gs_video = gr.Video(
|
| 108 |
+
height=520,
|
| 109 |
+
label="3DGS Rendered NVS Video (depth shown for reference only)",
|
| 110 |
+
interactive=False,
|
| 111 |
+
visible=False,
|
| 112 |
+
)
|
| 113 |
+
return gs_video, gs_info
|
| 114 |
+
|
| 115 |
+
def create_depth_section(self) -> Tuple[gr.Button, gr.Dropdown, gr.Button, gr.Image]:
|
| 116 |
+
"""
|
| 117 |
+
Create the depth visualization section.
|
| 118 |
+
|
| 119 |
+
Returns:
|
| 120 |
+
A tuple of (prev_depth_btn, depth_view_selector, next_depth_btn, depth_map)
|
| 121 |
+
"""
|
| 122 |
+
with gr.Row(elem_classes=["navigation-row"]):
|
| 123 |
+
prev_depth_btn = gr.Button("◀ Previous", size="sm", scale=1)
|
| 124 |
+
depth_view_selector = gr.Dropdown(
|
| 125 |
+
choices=["View 1"],
|
| 126 |
+
value="View 1",
|
| 127 |
+
label="Select View",
|
| 128 |
+
scale=2,
|
| 129 |
+
interactive=True,
|
| 130 |
+
allow_custom_value=True,
|
| 131 |
+
)
|
| 132 |
+
next_depth_btn = gr.Button("Next ▶", size="sm", scale=1)
|
| 133 |
+
depth_map = gr.Image(
|
| 134 |
+
type="numpy",
|
| 135 |
+
label="Colorized Depth Map",
|
| 136 |
+
format="png",
|
| 137 |
+
interactive=False,
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
return prev_depth_btn, depth_view_selector, next_depth_btn, depth_map
|
| 141 |
+
|
| 142 |
+
def create_measure_section(
|
| 143 |
+
self,
|
| 144 |
+
) -> Tuple[gr.Button, gr.Dropdown, gr.Button, gr.Image, gr.Image, gr.Markdown]:
|
| 145 |
+
"""
|
| 146 |
+
Create the measurement section.
|
| 147 |
+
|
| 148 |
+
Returns:
|
| 149 |
+
A tuple of (prev_measure_btn, measure_view_selector, next_measure_btn, measure_image,
|
| 150 |
+
measure_depth_image, measure_text)
|
| 151 |
+
"""
|
| 152 |
+
from depth_anything_3.app.css_and_html import MEASURE_INSTRUCTIONS_HTML
|
| 153 |
+
|
| 154 |
+
gr.Markdown(MEASURE_INSTRUCTIONS_HTML)
|
| 155 |
+
with gr.Row(elem_classes=["navigation-row"]):
|
| 156 |
+
prev_measure_btn = gr.Button("◀ Previous", size="sm", scale=1)
|
| 157 |
+
measure_view_selector = gr.Dropdown(
|
| 158 |
+
choices=["View 1"],
|
| 159 |
+
value="View 1",
|
| 160 |
+
label="Select View",
|
| 161 |
+
scale=2,
|
| 162 |
+
interactive=True,
|
| 163 |
+
allow_custom_value=True,
|
| 164 |
+
)
|
| 165 |
+
next_measure_btn = gr.Button("Next ▶", size="sm", scale=1)
|
| 166 |
+
with gr.Row():
|
| 167 |
+
measure_image = gr.Image(
|
| 168 |
+
type="numpy",
|
| 169 |
+
show_label=False,
|
| 170 |
+
format="webp",
|
| 171 |
+
interactive=False,
|
| 172 |
+
sources=[],
|
| 173 |
+
label="RGB Image",
|
| 174 |
+
scale=1,
|
| 175 |
+
height=275,
|
| 176 |
+
)
|
| 177 |
+
measure_depth_image = gr.Image(
|
| 178 |
+
type="numpy",
|
| 179 |
+
show_label=False,
|
| 180 |
+
format="webp",
|
| 181 |
+
interactive=False,
|
| 182 |
+
sources=[],
|
| 183 |
+
label="Depth Visualization (Right Half)",
|
| 184 |
+
scale=1,
|
| 185 |
+
height=275,
|
| 186 |
+
)
|
| 187 |
+
gr.Markdown(
|
| 188 |
+
"**Note:** Images have been adjusted to model processing size. "
|
| 189 |
+
"Click two points on the RGB image to measure distance."
|
| 190 |
+
)
|
| 191 |
+
measure_text = gr.Markdown("")
|
| 192 |
+
|
| 193 |
+
return (
|
| 194 |
+
prev_measure_btn,
|
| 195 |
+
measure_view_selector,
|
| 196 |
+
next_measure_btn,
|
| 197 |
+
measure_image,
|
| 198 |
+
measure_depth_image,
|
| 199 |
+
measure_text,
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
def create_inference_control_section(self) -> Tuple[gr.Dropdown, gr.Checkbox]:
|
| 203 |
+
"""
|
| 204 |
+
Create the inference control section (before inference).
|
| 205 |
+
|
| 206 |
+
Returns:
|
| 207 |
+
Tuple of (process_res_method_dropdown, infer_gs)
|
| 208 |
+
"""
|
| 209 |
+
with gr.Row():
|
| 210 |
+
process_res_method_dropdown = gr.Dropdown(
|
| 211 |
+
choices=["high_res", "low_res"],
|
| 212 |
+
value="low_res",
|
| 213 |
+
label="Image Processing Method",
|
| 214 |
+
info="low_res for much more images",
|
| 215 |
+
scale=1,
|
| 216 |
+
)
|
| 217 |
+
# Modify line 220, add color class
|
| 218 |
+
infer_gs = gr.Checkbox(
|
| 219 |
+
label="Infer 3D Gaussian Splatting",
|
| 220 |
+
value=False,
|
| 221 |
+
info=(
|
| 222 |
+
'Enable novel view rendering from 3DGS (<i class="fas fa-triangle-exclamation '
|
| 223 |
+
'fa-color-red"></i> requires extra processing time)'
|
| 224 |
+
),
|
| 225 |
+
scale=1,
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
return (process_res_method_dropdown, infer_gs)
|
| 229 |
+
|
| 230 |
+
def create_display_control_section(
|
| 231 |
+
self,
|
| 232 |
+
) -> Tuple[
|
| 233 |
+
gr.Checkbox,
|
| 234 |
+
gr.Checkbox,
|
| 235 |
+
gr.Checkbox,
|
| 236 |
+
gr.Slider,
|
| 237 |
+
gr.Slider,
|
| 238 |
+
gr.Dropdown,
|
| 239 |
+
gr.Dropdown,
|
| 240 |
+
gr.Button,
|
| 241 |
+
gr.ClearButton,
|
| 242 |
+
]:
|
| 243 |
+
"""
|
| 244 |
+
Create the display control section (options for visualization).
|
| 245 |
+
|
| 246 |
+
Returns:
|
| 247 |
+
Tuple of display control components including buttons
|
| 248 |
+
"""
|
| 249 |
+
with gr.Column():
|
| 250 |
+
# 3DGS options at the top
|
| 251 |
+
with gr.Row():
|
| 252 |
+
gs_trj_mode = gr.Dropdown(
|
| 253 |
+
choices=["smooth", "extend"],
|
| 254 |
+
value="smooth",
|
| 255 |
+
label=("Rendering trajectory for 3DGS viewpoints (requires n_views ≥ 2)"),
|
| 256 |
+
info=("'smooth' for view interpolation; 'extend' for longer trajectory"),
|
| 257 |
+
visible=False, # initially hidden
|
| 258 |
+
)
|
| 259 |
+
gs_video_quality = gr.Dropdown(
|
| 260 |
+
choices=["low", "medium", "high"],
|
| 261 |
+
value="low",
|
| 262 |
+
label=("Video quality for 3DGS rendered outputs"),
|
| 263 |
+
info=("'low' for faster loading speed; 'high' for better visual quality"),
|
| 264 |
+
visible=False, # initially hidden
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
# Reconstruct and Clear buttons (before Visualization Options)
|
| 268 |
+
with gr.Row():
|
| 269 |
+
submit_btn = gr.Button("Reconstruct", scale=1, variant="primary")
|
| 270 |
+
clear_btn = gr.ClearButton(scale=1)
|
| 271 |
+
|
| 272 |
+
gr.Markdown("### Visualization Options: (Click Reconstruct to update)")
|
| 273 |
+
show_cam = gr.Checkbox(label="Show Camera", value=True)
|
| 274 |
+
filter_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
|
| 275 |
+
filter_white_bg = gr.Checkbox(label="Filter White Background", value=False)
|
| 276 |
+
save_percentage = gr.Slider(
|
| 277 |
+
minimum=0,
|
| 278 |
+
maximum=100,
|
| 279 |
+
value=10,
|
| 280 |
+
step=1,
|
| 281 |
+
label="Filter Percentage",
|
| 282 |
+
info="Confidence Threshold (%): Higher values filter more points.",
|
| 283 |
+
)
|
| 284 |
+
num_max_points = gr.Slider(
|
| 285 |
+
minimum=1000,
|
| 286 |
+
maximum=100000,
|
| 287 |
+
value=1000,
|
| 288 |
+
step=1000,
|
| 289 |
+
label="Max Points (K points)",
|
| 290 |
+
info="Maximum number of points to export to GLB (in thousands)",
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
return (
|
| 294 |
+
show_cam,
|
| 295 |
+
filter_black_bg,
|
| 296 |
+
filter_white_bg,
|
| 297 |
+
save_percentage,
|
| 298 |
+
num_max_points,
|
| 299 |
+
gs_trj_mode,
|
| 300 |
+
gs_video_quality,
|
| 301 |
+
submit_btn,
|
| 302 |
+
clear_btn,
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
def create_control_section(
|
| 306 |
+
self,
|
| 307 |
+
) -> Tuple[
|
| 308 |
+
gr.Button,
|
| 309 |
+
gr.ClearButton,
|
| 310 |
+
gr.Dropdown,
|
| 311 |
+
gr.Checkbox,
|
| 312 |
+
gr.Checkbox,
|
| 313 |
+
gr.Checkbox,
|
| 314 |
+
gr.Checkbox,
|
| 315 |
+
gr.Checkbox,
|
| 316 |
+
gr.Dropdown,
|
| 317 |
+
gr.Checkbox,
|
| 318 |
+
gr.Textbox,
|
| 319 |
+
]:
|
| 320 |
+
"""
|
| 321 |
+
Create the control section with buttons and options.
|
| 322 |
+
|
| 323 |
+
Returns:
|
| 324 |
+
Tuple of control components
|
| 325 |
+
"""
|
| 326 |
+
with gr.Row():
|
| 327 |
+
submit_btn = gr.Button("Reconstruct", scale=1, variant="primary")
|
| 328 |
+
clear_btn = gr.ClearButton(
|
| 329 |
+
scale=1,
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
with gr.Row():
|
| 333 |
+
frame_filter = gr.Dropdown(
|
| 334 |
+
choices=["All"], value="All", label="Show Points from Frame"
|
| 335 |
+
)
|
| 336 |
+
with gr.Column():
|
| 337 |
+
gr.Markdown("### Visualization Option: (Click Reconstruct to update)")
|
| 338 |
+
show_cam = gr.Checkbox(label="Show Camera", value=True)
|
| 339 |
+
show_mesh = gr.Checkbox(label="Show Mesh", value=True)
|
| 340 |
+
filter_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
|
| 341 |
+
filter_white_bg = gr.Checkbox(label="Filter White Background", value=False)
|
| 342 |
+
gr.Markdown("### Reconstruction Options: (updated on next run)")
|
| 343 |
+
apply_mask_checkbox = gr.Checkbox(
|
| 344 |
+
label="Apply mask for predicted ambiguous depth classes & edges",
|
| 345 |
+
value=True,
|
| 346 |
+
)
|
| 347 |
+
process_res_method_dropdown = gr.Dropdown(
|
| 348 |
+
choices=[
|
| 349 |
+
"upper_bound_resize",
|
| 350 |
+
"upper_bound_crop",
|
| 351 |
+
"lower_bound_resize",
|
| 352 |
+
"lower_bound_crop",
|
| 353 |
+
],
|
| 354 |
+
value="upper_bound_resize",
|
| 355 |
+
label="Image Processing Method",
|
| 356 |
+
info="Method for resizing input images",
|
| 357 |
+
)
|
| 358 |
+
save_to_gallery_checkbox = gr.Checkbox(
|
| 359 |
+
label="Save to Gallery",
|
| 360 |
+
value=False,
|
| 361 |
+
info="Save current reconstruction results to gallery directory",
|
| 362 |
+
)
|
| 363 |
+
gallery_name_input = gr.Textbox(
|
| 364 |
+
label="Gallery Name",
|
| 365 |
+
placeholder="Enter a name for the gallery folder",
|
| 366 |
+
value="",
|
| 367 |
+
info="Leave empty for auto-generated name with timestamp",
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
return (
|
| 371 |
+
submit_btn,
|
| 372 |
+
clear_btn,
|
| 373 |
+
frame_filter,
|
| 374 |
+
show_cam,
|
| 375 |
+
show_mesh,
|
| 376 |
+
filter_black_bg,
|
| 377 |
+
filter_white_bg,
|
| 378 |
+
apply_mask_checkbox,
|
| 379 |
+
process_res_method_dropdown,
|
| 380 |
+
save_to_gallery_checkbox,
|
| 381 |
+
gallery_name_input,
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
def create_example_scenes_section(self) -> List[Dict[str, Any]]:
|
| 385 |
+
"""
|
| 386 |
+
Create the example scenes section.
|
| 387 |
+
|
| 388 |
+
Returns:
|
| 389 |
+
List of scene information dictionaries
|
| 390 |
+
"""
|
| 391 |
+
# Get workspace directory from environment variable
|
| 392 |
+
workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "gradio_workspace")
|
| 393 |
+
examples_dir = os.path.join(workspace_dir, "examples")
|
| 394 |
+
|
| 395 |
+
# Get scene information
|
| 396 |
+
scenes = get_scene_info(examples_dir)
|
| 397 |
+
|
| 398 |
+
return scenes
|
| 399 |
+
|
| 400 |
+
def create_example_scene_grid(self, scenes: List[Dict[str, Any]]) -> List[gr.Image]:
|
| 401 |
+
"""
|
| 402 |
+
Create the example scene grid.
|
| 403 |
+
|
| 404 |
+
Args:
|
| 405 |
+
scenes: List of scene information dictionaries
|
| 406 |
+
|
| 407 |
+
Returns:
|
| 408 |
+
List of scene image components
|
| 409 |
+
"""
|
| 410 |
+
scene_components = []
|
| 411 |
+
|
| 412 |
+
if scenes:
|
| 413 |
+
for i in range(0, len(scenes), 4): # Process 4 scenes per row
|
| 414 |
+
with gr.Row():
|
| 415 |
+
for j in range(4):
|
| 416 |
+
scene_idx = i + j
|
| 417 |
+
if scene_idx < len(scenes):
|
| 418 |
+
scene = scenes[scene_idx]
|
| 419 |
+
with gr.Column(scale=1, elem_classes=["clickable-thumbnail"]):
|
| 420 |
+
# Clickable thumbnail
|
| 421 |
+
scene_img = gr.Image(
|
| 422 |
+
value=scene["thumbnail"],
|
| 423 |
+
height=150,
|
| 424 |
+
interactive=False,
|
| 425 |
+
show_label=False,
|
| 426 |
+
elem_id=f"scene_thumb_{scene['name']}",
|
| 427 |
+
sources=[],
|
| 428 |
+
)
|
| 429 |
+
scene_components.append(scene_img)
|
| 430 |
+
|
| 431 |
+
# Scene name and image count as text below thumbnail
|
| 432 |
+
gr.Markdown(
|
| 433 |
+
f"**{scene['name']}** \n {scene['num_images']} images",
|
| 434 |
+
elem_classes=["scene-info"],
|
| 435 |
+
)
|
| 436 |
+
else:
|
| 437 |
+
# Empty column to maintain grid structure
|
| 438 |
+
with gr.Column(scale=1):
|
| 439 |
+
pass
|
| 440 |
+
|
| 441 |
+
return scene_components
|
| 442 |
+
|
| 443 |
+
def create_header_section(self) -> gr.HTML:
|
| 444 |
+
"""
|
| 445 |
+
Create the header section with logo and title.
|
| 446 |
+
|
| 447 |
+
Returns:
|
| 448 |
+
Header HTML component
|
| 449 |
+
"""
|
| 450 |
+
from depth_anything_3.app.css_and_html import get_header_html
|
| 451 |
+
|
| 452 |
+
return gr.HTML(get_header_html(get_logo_base64()))
|
| 453 |
+
|
| 454 |
+
def create_description_section(self) -> gr.HTML:
|
| 455 |
+
"""
|
| 456 |
+
Create the description section.
|
| 457 |
+
|
| 458 |
+
Returns:
|
| 459 |
+
Description HTML component
|
| 460 |
+
"""
|
| 461 |
+
from depth_anything_3.app.css_and_html import get_description_html
|
| 462 |
+
|
| 463 |
+
return gr.HTML(get_description_html())
|
| 464 |
+
|
| 465 |
+
def create_acknowledgements_section(self) -> gr.HTML:
|
| 466 |
+
"""
|
| 467 |
+
Create the acknowledgements section.
|
| 468 |
+
|
| 469 |
+
Returns:
|
| 470 |
+
Acknowledgements HTML component
|
| 471 |
+
"""
|
| 472 |
+
from depth_anything_3.app.css_and_html import get_acknowledgements_html
|
| 473 |
+
|
| 474 |
+
return gr.HTML(get_acknowledgements_html())
|
src/depth_anything_3/app/modules/utils.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
Utility functions for Depth Anything 3 Gradio app.
|
| 17 |
+
|
| 18 |
+
This module contains helper functions for data processing, visualization,
|
| 19 |
+
and file operations.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import gc
|
| 23 |
+
import json
|
| 24 |
+
import os
|
| 25 |
+
import shutil
|
| 26 |
+
from datetime import datetime
|
| 27 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 28 |
+
import numpy as np
|
| 29 |
+
import torch
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def create_depth_visualization(depth: np.ndarray) -> Optional[np.ndarray]:
|
| 33 |
+
"""
|
| 34 |
+
Create a colored depth visualization.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
depth: Depth array
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
Colored depth visualization or None
|
| 41 |
+
"""
|
| 42 |
+
if depth is None:
|
| 43 |
+
return None
|
| 44 |
+
|
| 45 |
+
# Normalize depth to 0-1 range
|
| 46 |
+
depth_min = depth[depth > 0].min() if (depth > 0).any() else 0
|
| 47 |
+
depth_max = depth.max()
|
| 48 |
+
|
| 49 |
+
if depth_max <= depth_min:
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
# Normalize depth
|
| 53 |
+
depth_norm = (depth - depth_min) / (depth_max - depth_min)
|
| 54 |
+
depth_norm = np.clip(depth_norm, 0, 1)
|
| 55 |
+
|
| 56 |
+
# Apply colormap (using matplotlib's viridis colormap)
|
| 57 |
+
import matplotlib.cm as cm
|
| 58 |
+
|
| 59 |
+
# Convert to colored image
|
| 60 |
+
depth_colored = cm.viridis(depth_norm)[:, :, :3] # Remove alpha channel
|
| 61 |
+
depth_colored = (depth_colored * 255).astype(np.uint8)
|
| 62 |
+
|
| 63 |
+
return depth_colored
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def save_to_gallery_func(
|
| 67 |
+
target_dir: str, processed_data: Dict[int, Dict[str, Any]], gallery_name: Optional[str] = None
|
| 68 |
+
) -> Tuple[bool, str]:
|
| 69 |
+
"""
|
| 70 |
+
Save the current reconstruction results to the gallery directory.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
target_dir: Source directory containing reconstruction results
|
| 74 |
+
processed_data: Processed data dictionary
|
| 75 |
+
gallery_name: Name for the gallery folder
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
Tuple of (success, message)
|
| 79 |
+
"""
|
| 80 |
+
try:
|
| 81 |
+
# Get gallery directory from environment variable or use default
|
| 82 |
+
gallery_dir = os.environ.get(
|
| 83 |
+
"DA3_GALLERY_DIR",
|
| 84 |
+
"workspace/gallery",
|
| 85 |
+
)
|
| 86 |
+
if not os.path.exists(gallery_dir):
|
| 87 |
+
os.makedirs(gallery_dir)
|
| 88 |
+
|
| 89 |
+
# Use provided name or create a unique name
|
| 90 |
+
if gallery_name is None or gallery_name.strip() == "":
|
| 91 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 92 |
+
gallery_name = f"reconstruction_{timestamp}"
|
| 93 |
+
|
| 94 |
+
gallery_path = os.path.join(gallery_dir, gallery_name)
|
| 95 |
+
|
| 96 |
+
# Check if directory already exists
|
| 97 |
+
if os.path.exists(gallery_path):
|
| 98 |
+
return False, f"Save failed: folder '{gallery_name}' already exists"
|
| 99 |
+
|
| 100 |
+
# Create the gallery directory
|
| 101 |
+
os.makedirs(gallery_path, exist_ok=True)
|
| 102 |
+
|
| 103 |
+
# Copy GLB file
|
| 104 |
+
glb_source = os.path.join(target_dir, "scene.glb")
|
| 105 |
+
glb_dest = os.path.join(gallery_path, "scene.glb")
|
| 106 |
+
if os.path.exists(glb_source):
|
| 107 |
+
shutil.copy2(glb_source, glb_dest)
|
| 108 |
+
|
| 109 |
+
# Copy depth visualization images
|
| 110 |
+
depth_vis_dir = os.path.join(target_dir, "depth_vis")
|
| 111 |
+
if os.path.exists(depth_vis_dir):
|
| 112 |
+
gallery_depth_vis = os.path.join(gallery_path, "depth_vis")
|
| 113 |
+
shutil.copytree(depth_vis_dir, gallery_depth_vis)
|
| 114 |
+
|
| 115 |
+
# Copy original images
|
| 116 |
+
images_source = os.path.join(target_dir, "images")
|
| 117 |
+
if os.path.exists(images_source):
|
| 118 |
+
gallery_images = os.path.join(gallery_path, "images")
|
| 119 |
+
shutil.copytree(images_source, gallery_images)
|
| 120 |
+
|
| 121 |
+
scene_preview_source = os.path.join(target_dir, "scene.jpg")
|
| 122 |
+
scene_preview_dest = os.path.join(gallery_path, "scene.jpg")
|
| 123 |
+
shutil.copy2(scene_preview_source, scene_preview_dest)
|
| 124 |
+
|
| 125 |
+
# Save metadata
|
| 126 |
+
metadata = {
|
| 127 |
+
"timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
|
| 128 |
+
"num_images": len(processed_data) if processed_data else 0,
|
| 129 |
+
"gallery_name": gallery_name,
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
with open(os.path.join(gallery_path, "metadata.json"), "w") as f:
|
| 133 |
+
json.dump(metadata, f, indent=2)
|
| 134 |
+
|
| 135 |
+
print(f"Saved reconstruction to gallery: {gallery_path}")
|
| 136 |
+
return True, f"Save successful: saved to {gallery_path}"
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f"Error saving to gallery: {e}")
|
| 140 |
+
return False, f"Save failed: {str(e)}"
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def get_scene_info(examples_dir: str) -> List[Dict[str, Any]]:
|
| 144 |
+
"""
|
| 145 |
+
Get information about scenes in the examples directory.
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
examples_dir: Path to examples directory
|
| 149 |
+
|
| 150 |
+
Returns:
|
| 151 |
+
List of scene information dictionaries
|
| 152 |
+
"""
|
| 153 |
+
import glob
|
| 154 |
+
|
| 155 |
+
scenes = []
|
| 156 |
+
if not os.path.exists(examples_dir):
|
| 157 |
+
return scenes
|
| 158 |
+
|
| 159 |
+
for scene_folder in sorted(os.listdir(examples_dir)):
|
| 160 |
+
scene_path = os.path.join(examples_dir, scene_folder)
|
| 161 |
+
if os.path.isdir(scene_path):
|
| 162 |
+
# Find all image files in the scene folder
|
| 163 |
+
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff", "*.tif"]
|
| 164 |
+
image_files = []
|
| 165 |
+
for ext in image_extensions:
|
| 166 |
+
image_files.extend(glob.glob(os.path.join(scene_path, ext)))
|
| 167 |
+
image_files.extend(glob.glob(os.path.join(scene_path, ext.upper())))
|
| 168 |
+
|
| 169 |
+
if image_files:
|
| 170 |
+
# Sort images and get the first one for thumbnail
|
| 171 |
+
image_files = sorted(image_files)
|
| 172 |
+
first_image = image_files[0]
|
| 173 |
+
num_images = len(image_files)
|
| 174 |
+
|
| 175 |
+
scenes.append(
|
| 176 |
+
{
|
| 177 |
+
"name": scene_folder,
|
| 178 |
+
"path": scene_path,
|
| 179 |
+
"thumbnail": first_image,
|
| 180 |
+
"num_images": num_images,
|
| 181 |
+
"image_files": image_files,
|
| 182 |
+
}
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
return scenes
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def cleanup_memory() -> None:
|
| 189 |
+
"""Clean up GPU memory and garbage collect."""
|
| 190 |
+
gc.collect()
|
| 191 |
+
if torch.cuda.is_available():
|
| 192 |
+
torch.cuda.empty_cache()
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def get_logo_base64() -> Optional[str]:
|
| 196 |
+
"""
|
| 197 |
+
Convert WAI logo to base64 for embedding in HTML.
|
| 198 |
+
|
| 199 |
+
Returns:
|
| 200 |
+
Base64 encoded logo string or None
|
| 201 |
+
"""
|
| 202 |
+
import base64
|
| 203 |
+
|
| 204 |
+
logo_path = "examples/WAI-Logo/wai_logo.png"
|
| 205 |
+
try:
|
| 206 |
+
with open(logo_path, "rb") as img_file:
|
| 207 |
+
img_data = img_file.read()
|
| 208 |
+
base64_str = base64.b64encode(img_data).decode()
|
| 209 |
+
return f"data:image/png;base64,{base64_str}"
|
| 210 |
+
except FileNotFoundError:
|
| 211 |
+
return None
|
src/depth_anything_3/app/modules/visualization.py
ADDED
|
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
Visualization module for Depth Anything 3 Gradio app.
|
| 17 |
+
|
| 18 |
+
This module handles visualization updates, navigation, and measurement functionality.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import os
|
| 22 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 23 |
+
import cv2
|
| 24 |
+
import gradio as gr
|
| 25 |
+
import numpy as np
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class VisualizationHandler:
|
| 29 |
+
"""
|
| 30 |
+
Handles visualization updates and navigation for the Gradio app.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self):
|
| 34 |
+
"""Initialize the visualization handler."""
|
| 35 |
+
|
| 36 |
+
def update_view_selectors(
|
| 37 |
+
self, processed_data: Optional[Dict[int, Dict[str, Any]]]
|
| 38 |
+
) -> Tuple[gr.Dropdown, gr.Dropdown]:
|
| 39 |
+
"""
|
| 40 |
+
Update view selector dropdowns based on available views.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
processed_data: Processed data dictionary
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
Tuple of (depth_view_selector, measure_view_selector)
|
| 47 |
+
"""
|
| 48 |
+
if processed_data is None or len(processed_data) == 0:
|
| 49 |
+
choices = ["View 1"]
|
| 50 |
+
else:
|
| 51 |
+
num_views = len(processed_data)
|
| 52 |
+
choices = [f"View {i + 1}" for i in range(num_views)]
|
| 53 |
+
|
| 54 |
+
return (
|
| 55 |
+
gr.Dropdown(choices=choices, value=choices[0]), # depth_view_selector
|
| 56 |
+
gr.Dropdown(choices=choices, value=choices[0]), # measure_view_selector
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
def get_view_data_by_index(
|
| 60 |
+
self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
|
| 61 |
+
) -> Optional[Dict[str, Any]]:
|
| 62 |
+
"""
|
| 63 |
+
Get view data by index, handling bounds.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
processed_data: Processed data dictionary
|
| 67 |
+
view_index: Index of the view to get
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
View data dictionary or None
|
| 71 |
+
"""
|
| 72 |
+
if processed_data is None or len(processed_data) == 0:
|
| 73 |
+
return None
|
| 74 |
+
|
| 75 |
+
view_keys = list(processed_data.keys())
|
| 76 |
+
if view_index < 0 or view_index >= len(view_keys):
|
| 77 |
+
view_index = 0
|
| 78 |
+
|
| 79 |
+
return processed_data[view_keys[view_index]]
|
| 80 |
+
|
| 81 |
+
def update_depth_view(
|
| 82 |
+
self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
|
| 83 |
+
) -> Optional[str]:
|
| 84 |
+
"""
|
| 85 |
+
Update depth view for a specific view index.
|
| 86 |
+
|
| 87 |
+
Args:
|
| 88 |
+
processed_data: Processed data dictionary
|
| 89 |
+
view_index: Index of the view to update
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
Path to depth visualization image or None
|
| 93 |
+
"""
|
| 94 |
+
view_data = self.get_view_data_by_index(processed_data, view_index)
|
| 95 |
+
if view_data is None or view_data.get("depth_image") is None:
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
# Return the depth visualization image directly
|
| 99 |
+
return view_data["depth_image"]
|
| 100 |
+
|
| 101 |
+
def navigate_depth_view(
|
| 102 |
+
self,
|
| 103 |
+
processed_data: Optional[Dict[int, Dict[str, Any]]],
|
| 104 |
+
current_selector_value: str,
|
| 105 |
+
direction: int,
|
| 106 |
+
) -> Tuple[str, Optional[str]]:
|
| 107 |
+
"""
|
| 108 |
+
Navigate depth view (direction: -1 for previous, +1 for next).
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
processed_data: Processed data dictionary
|
| 112 |
+
current_selector_value: Current selector value
|
| 113 |
+
direction: Direction to navigate (-1 for previous, +1 for next)
|
| 114 |
+
|
| 115 |
+
Returns:
|
| 116 |
+
Tuple of (new_selector_value, depth_vis)
|
| 117 |
+
"""
|
| 118 |
+
if processed_data is None or len(processed_data) == 0:
|
| 119 |
+
return "View 1", None
|
| 120 |
+
|
| 121 |
+
# Parse current view number
|
| 122 |
+
try:
|
| 123 |
+
current_view = int(current_selector_value.split()[1]) - 1
|
| 124 |
+
except: # noqa
|
| 125 |
+
current_view = 0
|
| 126 |
+
|
| 127 |
+
num_views = len(processed_data)
|
| 128 |
+
new_view = (current_view + direction) % num_views
|
| 129 |
+
|
| 130 |
+
new_selector_value = f"View {new_view + 1}"
|
| 131 |
+
depth_vis = self.update_depth_view(processed_data, new_view)
|
| 132 |
+
|
| 133 |
+
return new_selector_value, depth_vis
|
| 134 |
+
|
| 135 |
+
def update_measure_view(
|
| 136 |
+
self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
|
| 137 |
+
) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], List]:
|
| 138 |
+
"""
|
| 139 |
+
Update measure view for a specific view index.
|
| 140 |
+
|
| 141 |
+
Args:
|
| 142 |
+
processed_data: Processed data dictionary
|
| 143 |
+
view_index: Index of the view to update
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
Tuple of (measure_image, depth_right_half, measure_points)
|
| 147 |
+
"""
|
| 148 |
+
view_data = self.get_view_data_by_index(processed_data, view_index)
|
| 149 |
+
if view_data is None:
|
| 150 |
+
return None, None, [] # image, depth_right_half, measure_points
|
| 151 |
+
|
| 152 |
+
# Get the processed (resized) image
|
| 153 |
+
if "image" in view_data and view_data["image"] is not None:
|
| 154 |
+
image = view_data["image"].copy()
|
| 155 |
+
else:
|
| 156 |
+
return None, None, []
|
| 157 |
+
|
| 158 |
+
# Ensure image is in uint8 format
|
| 159 |
+
if image.dtype != np.uint8:
|
| 160 |
+
if image.max() <= 1.0:
|
| 161 |
+
image = (image * 255).astype(np.uint8)
|
| 162 |
+
else:
|
| 163 |
+
image = image.astype(np.uint8)
|
| 164 |
+
|
| 165 |
+
# Extract right half of the depth visualization (pure depth part)
|
| 166 |
+
depth_image_path = view_data.get("depth_image", None)
|
| 167 |
+
depth_right_half = None
|
| 168 |
+
|
| 169 |
+
if depth_image_path and os.path.exists(depth_image_path):
|
| 170 |
+
try:
|
| 171 |
+
# Load the combined depth visualization image
|
| 172 |
+
depth_combined = cv2.imread(depth_image_path)
|
| 173 |
+
depth_combined = cv2.cvtColor(depth_combined, cv2.COLOR_BGR2RGB)
|
| 174 |
+
if depth_combined is not None:
|
| 175 |
+
height, width = depth_combined.shape[:2]
|
| 176 |
+
# Extract right half (depth visualization part)
|
| 177 |
+
depth_right_half = depth_combined[:, width // 2 :]
|
| 178 |
+
except Exception as e:
|
| 179 |
+
print(f"Error extracting depth right half: {e}")
|
| 180 |
+
|
| 181 |
+
return image, depth_right_half, []
|
| 182 |
+
|
| 183 |
+
def navigate_measure_view(
|
| 184 |
+
self,
|
| 185 |
+
processed_data: Optional[Dict[int, Dict[str, Any]]],
|
| 186 |
+
current_selector_value: str,
|
| 187 |
+
direction: int,
|
| 188 |
+
) -> Tuple[str, Optional[np.ndarray], Optional[str], List]:
|
| 189 |
+
"""
|
| 190 |
+
Navigate measure view (direction: -1 for previous, +1 for next).
|
| 191 |
+
|
| 192 |
+
Args:
|
| 193 |
+
processed_data: Processed data dictionary
|
| 194 |
+
current_selector_value: Current selector value
|
| 195 |
+
direction: Direction to navigate (-1 for previous, +1 for next)
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
Tuple of (new_selector_value, measure_image, depth_image_path, measure_points)
|
| 199 |
+
"""
|
| 200 |
+
if processed_data is None or len(processed_data) == 0:
|
| 201 |
+
return "View 1", None, None, []
|
| 202 |
+
|
| 203 |
+
# Parse current view number
|
| 204 |
+
try:
|
| 205 |
+
current_view = int(current_selector_value.split()[1]) - 1
|
| 206 |
+
except: # noqa
|
| 207 |
+
current_view = 0
|
| 208 |
+
|
| 209 |
+
num_views = len(processed_data)
|
| 210 |
+
new_view = (current_view + direction) % num_views
|
| 211 |
+
|
| 212 |
+
new_selector_value = f"View {new_view + 1}"
|
| 213 |
+
measure_image, depth_right_half, measure_points = self.update_measure_view(
|
| 214 |
+
processed_data, new_view
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
return new_selector_value, measure_image, depth_right_half, measure_points
|
| 218 |
+
|
| 219 |
+
def populate_visualization_tabs(
|
| 220 |
+
self, processed_data: Optional[Dict[int, Dict[str, Any]]]
|
| 221 |
+
) -> Tuple[Optional[str], Optional[np.ndarray], Optional[str], List]:
|
| 222 |
+
"""
|
| 223 |
+
Populate the depth and measure tabs with processed data.
|
| 224 |
+
|
| 225 |
+
Args:
|
| 226 |
+
processed_data: Processed data dictionary
|
| 227 |
+
|
| 228 |
+
Returns:
|
| 229 |
+
Tuple of (depth_vis, measure_img, depth_image_path, measure_points)
|
| 230 |
+
"""
|
| 231 |
+
if processed_data is None or len(processed_data) == 0:
|
| 232 |
+
return None, None, None, []
|
| 233 |
+
|
| 234 |
+
# Use update function to get depth visualization
|
| 235 |
+
depth_vis = self.update_depth_view(processed_data, 0)
|
| 236 |
+
measure_img, depth_right_half, _ = self.update_measure_view(processed_data, 0)
|
| 237 |
+
|
| 238 |
+
return depth_vis, measure_img, depth_right_half, []
|
| 239 |
+
|
| 240 |
+
def reset_measure(
|
| 241 |
+
self, processed_data: Optional[Dict[int, Dict[str, Any]]]
|
| 242 |
+
) -> Tuple[Optional[np.ndarray], List, str]:
|
| 243 |
+
"""
|
| 244 |
+
Reset measure points.
|
| 245 |
+
|
| 246 |
+
Args:
|
| 247 |
+
processed_data: Processed data dictionary
|
| 248 |
+
|
| 249 |
+
Returns:
|
| 250 |
+
Tuple of (image, measure_points, text)
|
| 251 |
+
"""
|
| 252 |
+
if processed_data is None or len(processed_data) == 0:
|
| 253 |
+
return None, [], ""
|
| 254 |
+
|
| 255 |
+
# Return the first view image
|
| 256 |
+
first_view = list(processed_data.values())[0]
|
| 257 |
+
return first_view["image"], [], ""
|
| 258 |
+
|
| 259 |
+
def measure(
|
| 260 |
+
self,
|
| 261 |
+
processed_data: Optional[Dict[int, Dict[str, Any]]],
|
| 262 |
+
measure_points: List,
|
| 263 |
+
current_view_selector: str,
|
| 264 |
+
event: gr.SelectData,
|
| 265 |
+
) -> List:
|
| 266 |
+
"""
|
| 267 |
+
Handle measurement on images.
|
| 268 |
+
|
| 269 |
+
Args:
|
| 270 |
+
processed_data: Processed data dictionary
|
| 271 |
+
measure_points: List of current measure points
|
| 272 |
+
current_view_selector: Current view selector value
|
| 273 |
+
event: Gradio select event
|
| 274 |
+
|
| 275 |
+
Returns:
|
| 276 |
+
List of [image, depth_right_half, measure_points, text]
|
| 277 |
+
"""
|
| 278 |
+
try:
|
| 279 |
+
print(f"Measure function called with selector: {current_view_selector}")
|
| 280 |
+
|
| 281 |
+
if processed_data is None or len(processed_data) == 0:
|
| 282 |
+
return [None, [], "No data available"]
|
| 283 |
+
|
| 284 |
+
# Use the currently selected view instead of always using the first view
|
| 285 |
+
try:
|
| 286 |
+
current_view_index = int(current_view_selector.split()[1]) - 1
|
| 287 |
+
except: # noqa
|
| 288 |
+
current_view_index = 0
|
| 289 |
+
|
| 290 |
+
print(f"Using view index: {current_view_index}")
|
| 291 |
+
|
| 292 |
+
# Get view data safely
|
| 293 |
+
if current_view_index < 0 or current_view_index >= len(processed_data):
|
| 294 |
+
current_view_index = 0
|
| 295 |
+
|
| 296 |
+
view_keys = list(processed_data.keys())
|
| 297 |
+
current_view = processed_data[view_keys[current_view_index]]
|
| 298 |
+
|
| 299 |
+
if current_view is None:
|
| 300 |
+
return [None, [], "No view data available"]
|
| 301 |
+
|
| 302 |
+
point2d = event.index[0], event.index[1]
|
| 303 |
+
print(f"Clicked point: {point2d}")
|
| 304 |
+
|
| 305 |
+
measure_points.append(point2d)
|
| 306 |
+
|
| 307 |
+
# Get image and depth visualization
|
| 308 |
+
image, depth_right_half, _ = self.update_measure_view(
|
| 309 |
+
processed_data, current_view_index
|
| 310 |
+
)
|
| 311 |
+
if image is None:
|
| 312 |
+
return [None, [], "No image available"]
|
| 313 |
+
|
| 314 |
+
image = image.copy()
|
| 315 |
+
|
| 316 |
+
# Ensure image is in uint8 format for proper cv2 operations
|
| 317 |
+
try:
|
| 318 |
+
if image.dtype != np.uint8:
|
| 319 |
+
if image.max() <= 1.0:
|
| 320 |
+
# Image is in [0, 1] range, convert to [0, 255]
|
| 321 |
+
image = (image * 255).astype(np.uint8)
|
| 322 |
+
else:
|
| 323 |
+
# Image is already in [0, 255] range
|
| 324 |
+
image = image.astype(np.uint8)
|
| 325 |
+
except Exception as e:
|
| 326 |
+
print(f"Image conversion error: {e}")
|
| 327 |
+
return [None, [], f"Image conversion error: {e}"]
|
| 328 |
+
|
| 329 |
+
# Draw circles for points
|
| 330 |
+
try:
|
| 331 |
+
for p in measure_points:
|
| 332 |
+
if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
|
| 333 |
+
image = cv2.circle(image, p, radius=5, color=(255, 0, 0), thickness=2)
|
| 334 |
+
except Exception as e:
|
| 335 |
+
print(f"Drawing error: {e}")
|
| 336 |
+
return [None, [], f"Drawing error: {e}"]
|
| 337 |
+
|
| 338 |
+
# Get depth information from processed_data
|
| 339 |
+
depth_text = ""
|
| 340 |
+
try:
|
| 341 |
+
for i, p in enumerate(measure_points):
|
| 342 |
+
if (
|
| 343 |
+
current_view["depth"] is not None
|
| 344 |
+
and 0 <= p[1] < current_view["depth"].shape[0]
|
| 345 |
+
and 0 <= p[0] < current_view["depth"].shape[1]
|
| 346 |
+
):
|
| 347 |
+
d = current_view["depth"][p[1], p[0]]
|
| 348 |
+
depth_text += f"- **P{i + 1} depth: {d:.2f}m**\n"
|
| 349 |
+
else:
|
| 350 |
+
depth_text += f"- **P{i + 1}: Click position ({p[0]}, {p[1]}) - No depth information**\n" # noqa: E501
|
| 351 |
+
except Exception as e:
|
| 352 |
+
print(f"Depth text error: {e}")
|
| 353 |
+
depth_text = f"Error computing depth: {e}\n"
|
| 354 |
+
|
| 355 |
+
if len(measure_points) == 2:
|
| 356 |
+
try:
|
| 357 |
+
point1, point2 = measure_points
|
| 358 |
+
# Draw line
|
| 359 |
+
if (
|
| 360 |
+
0 <= point1[0] < image.shape[1]
|
| 361 |
+
and 0 <= point1[1] < image.shape[0]
|
| 362 |
+
and 0 <= point2[0] < image.shape[1]
|
| 363 |
+
and 0 <= point2[1] < image.shape[0]
|
| 364 |
+
):
|
| 365 |
+
image = cv2.line(image, point1, point2, color=(255, 0, 0), thickness=2)
|
| 366 |
+
|
| 367 |
+
# Compute 3D distance using depth information and camera intrinsics
|
| 368 |
+
distance_text = "- **Distance: Unable to calculate 3D distance**"
|
| 369 |
+
if (
|
| 370 |
+
current_view["depth"] is not None
|
| 371 |
+
and 0 <= point1[1] < current_view["depth"].shape[0]
|
| 372 |
+
and 0 <= point1[0] < current_view["depth"].shape[1]
|
| 373 |
+
and 0 <= point2[1] < current_view["depth"].shape[0]
|
| 374 |
+
and 0 <= point2[0] < current_view["depth"].shape[1]
|
| 375 |
+
):
|
| 376 |
+
try:
|
| 377 |
+
# Get depth values at the two points
|
| 378 |
+
d1 = current_view["depth"][point1[1], point1[0]]
|
| 379 |
+
d2 = current_view["depth"][point2[1], point2[0]]
|
| 380 |
+
|
| 381 |
+
# Convert 2D pixel coordinates to 3D world coordinates
|
| 382 |
+
if current_view["intrinsics"] is not None:
|
| 383 |
+
# Get camera intrinsics
|
| 384 |
+
K = current_view["intrinsics"] # 3x3 intrinsic matrix
|
| 385 |
+
fx, fy = K[0, 0], K[1, 1] # focal lengths
|
| 386 |
+
cx, cy = K[0, 2], K[1, 2] # principal point
|
| 387 |
+
|
| 388 |
+
# Convert pixel coordinates to normalized camera coordinates
|
| 389 |
+
# Point 1: (u1, v1) -> (x1, y1, z1)
|
| 390 |
+
u1, v1 = point1[0], point1[1]
|
| 391 |
+
x1 = (u1 - cx) * d1 / fx
|
| 392 |
+
y1 = (v1 - cy) * d1 / fy
|
| 393 |
+
z1 = d1
|
| 394 |
+
|
| 395 |
+
# Point 2: (u2, v2) -> (x2, y2, z2)
|
| 396 |
+
u2, v2 = point2[0], point2[1]
|
| 397 |
+
x2 = (u2 - cx) * d2 / fx
|
| 398 |
+
y2 = (v2 - cy) * d2 / fy
|
| 399 |
+
z2 = d2
|
| 400 |
+
|
| 401 |
+
# Calculate 3D Euclidean distance
|
| 402 |
+
p1_3d = np.array([x1, y1, z1])
|
| 403 |
+
p2_3d = np.array([x2, y2, z2])
|
| 404 |
+
distance_3d = np.linalg.norm(p1_3d - p2_3d)
|
| 405 |
+
|
| 406 |
+
distance_text = f"- **Distance: {distance_3d:.2f}m**"
|
| 407 |
+
else:
|
| 408 |
+
# Fallback to simplified calculation if no intrinsics
|
| 409 |
+
pixel_distance = np.sqrt(
|
| 410 |
+
(point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2
|
| 411 |
+
)
|
| 412 |
+
avg_depth = (d1 + d2) / 2
|
| 413 |
+
scale_factor = avg_depth / 1000 # Rough scaling factor
|
| 414 |
+
estimated_3d_distance = pixel_distance * scale_factor
|
| 415 |
+
distance_text = f"- **Distance: {estimated_3d_distance:.2f}m (estimated, no intrinsics)**" # noqa: E501
|
| 416 |
+
|
| 417 |
+
except Exception as e:
|
| 418 |
+
print(f"Distance computation error: {e}")
|
| 419 |
+
distance_text = f"- **Distance computation error: {e}**"
|
| 420 |
+
|
| 421 |
+
measure_points = []
|
| 422 |
+
text = depth_text + distance_text
|
| 423 |
+
print(f"Measurement complete: {text}")
|
| 424 |
+
return [image, depth_right_half, measure_points, text]
|
| 425 |
+
except Exception as e:
|
| 426 |
+
print(f"Final measurement error: {e}")
|
| 427 |
+
return [None, [], f"Measurement error: {e}"]
|
| 428 |
+
else:
|
| 429 |
+
print(f"Single point measurement: {depth_text}")
|
| 430 |
+
return [image, depth_right_half, measure_points, depth_text]
|
| 431 |
+
|
| 432 |
+
except Exception as e:
|
| 433 |
+
print(f"Overall measure function error: {e}")
|
| 434 |
+
return [None, [], f"Measure function error: {e}"]
|
src/depth_anything_3/cfg.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
Configuration utility functions
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import importlib
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from typing import Any, Callable, List, Union
|
| 22 |
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
OmegaConf.register_new_resolver("eval", eval)
|
| 26 |
+
except Exception as e:
|
| 27 |
+
# if eval is not available, we can just pass
|
| 28 |
+
print(f"Error registering eval resolver: {e}")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def load_config(path: str, argv: List[str] = None) -> Union[DictConfig, ListConfig]:
|
| 32 |
+
"""
|
| 33 |
+
Load a configuration. Will resolve inheritance.
|
| 34 |
+
Supports both file paths and module paths (e.g., depth_anything_3.configs.giant).
|
| 35 |
+
"""
|
| 36 |
+
# Check if path is a module path (contains dots but no slashes and doesn't end with .yaml)
|
| 37 |
+
if "." in path and "/" not in path and not path.endswith(".yaml"):
|
| 38 |
+
# It's a module path, load from package resources
|
| 39 |
+
path_parts = path.split(".")[1:]
|
| 40 |
+
config_path = Path(__file__).resolve().parent
|
| 41 |
+
for part in path_parts:
|
| 42 |
+
config_path = config_path.joinpath(part)
|
| 43 |
+
config_path = config_path.with_suffix(".yaml")
|
| 44 |
+
config = OmegaConf.load(str(config_path))
|
| 45 |
+
else:
|
| 46 |
+
# It's a file path (absolute, relative, or with .yaml extension)
|
| 47 |
+
config = OmegaConf.load(path)
|
| 48 |
+
|
| 49 |
+
if argv is not None:
|
| 50 |
+
config_argv = OmegaConf.from_dotlist(argv)
|
| 51 |
+
config = OmegaConf.merge(config, config_argv)
|
| 52 |
+
config = resolve_recursive(config, resolve_inheritance)
|
| 53 |
+
return config
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def resolve_recursive(
|
| 57 |
+
config: Any,
|
| 58 |
+
resolver: Callable[[Union[DictConfig, ListConfig]], Union[DictConfig, ListConfig]],
|
| 59 |
+
) -> Any:
|
| 60 |
+
config = resolver(config)
|
| 61 |
+
if isinstance(config, DictConfig):
|
| 62 |
+
for k in config.keys():
|
| 63 |
+
v = config.get(k)
|
| 64 |
+
if isinstance(v, (DictConfig, ListConfig)):
|
| 65 |
+
config[k] = resolve_recursive(v, resolver)
|
| 66 |
+
if isinstance(config, ListConfig):
|
| 67 |
+
for i in range(len(config)):
|
| 68 |
+
v = config.get(i)
|
| 69 |
+
if isinstance(v, (DictConfig, ListConfig)):
|
| 70 |
+
config[i] = resolve_recursive(v, resolver)
|
| 71 |
+
return config
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def resolve_inheritance(config: Union[DictConfig, ListConfig]) -> Any:
|
| 75 |
+
"""
|
| 76 |
+
Recursively resolve inheritance if the config contains:
|
| 77 |
+
__inherit__: path/to/parent.yaml or a ListConfig of such paths.
|
| 78 |
+
"""
|
| 79 |
+
if isinstance(config, DictConfig):
|
| 80 |
+
inherit = config.pop("__inherit__", None)
|
| 81 |
+
|
| 82 |
+
if inherit:
|
| 83 |
+
inherit_list = inherit if isinstance(inherit, ListConfig) else [inherit]
|
| 84 |
+
|
| 85 |
+
parent_config = None
|
| 86 |
+
for parent_path in inherit_list:
|
| 87 |
+
assert isinstance(parent_path, str)
|
| 88 |
+
parent_config = (
|
| 89 |
+
load_config(parent_path)
|
| 90 |
+
if parent_config is None
|
| 91 |
+
else OmegaConf.merge(parent_config, load_config(parent_path))
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
if len(config.keys()) > 0:
|
| 95 |
+
config = OmegaConf.merge(parent_config, config)
|
| 96 |
+
else:
|
| 97 |
+
config = parent_config
|
| 98 |
+
return config
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def import_item(path: str, name: str) -> Any:
|
| 102 |
+
"""
|
| 103 |
+
Import a python item. Example: import_item("path.to.file", "MyClass") -> MyClass
|
| 104 |
+
"""
|
| 105 |
+
return getattr(importlib.import_module(path), name)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def create_object(config: DictConfig) -> Any:
|
| 109 |
+
"""
|
| 110 |
+
Create an object from config.
|
| 111 |
+
The config is expected to contains the following:
|
| 112 |
+
__object__:
|
| 113 |
+
path: path.to.module
|
| 114 |
+
name: MyClass
|
| 115 |
+
args: as_config | as_params (default to as_config)
|
| 116 |
+
"""
|
| 117 |
+
config = DictConfig(config)
|
| 118 |
+
item = import_item(
|
| 119 |
+
path=config.__object__.path,
|
| 120 |
+
name=config.__object__.name,
|
| 121 |
+
)
|
| 122 |
+
args = config.__object__.get("args", "as_config")
|
| 123 |
+
if args == "as_config":
|
| 124 |
+
return item(config)
|
| 125 |
+
if args == "as_params":
|
| 126 |
+
config = OmegaConf.to_object(config)
|
| 127 |
+
config.pop("__object__")
|
| 128 |
+
return item(**config)
|
| 129 |
+
raise NotImplementedError(f"Unknown args type: {args}")
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def create_dataset(path: str, *args, **kwargs) -> Any:
|
| 133 |
+
"""
|
| 134 |
+
Create a dataset. Requires the file to contain a "create_dataset" function.
|
| 135 |
+
"""
|
| 136 |
+
return import_item(path, "create_dataset")(*args, **kwargs)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def to_dict_recursive(config_obj):
|
| 140 |
+
if isinstance(config_obj, DictConfig):
|
| 141 |
+
return {k: to_dict_recursive(v) for k, v in config_obj.items()}
|
| 142 |
+
elif isinstance(config_obj, ListConfig):
|
| 143 |
+
return [to_dict_recursive(item) for item in config_obj]
|
| 144 |
+
return config_obj
|
src/depth_anything_3/cli.py
ADDED
|
@@ -0,0 +1,742 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# flake8: noqa: E402
|
| 2 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 3 |
+
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
+
# you may not use this file except in compliance with the License.
|
| 6 |
+
# You may obtain a copy of the License at
|
| 7 |
+
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
+
#
|
| 10 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
+
# See the License for the specific language governing permissions and
|
| 14 |
+
# limitations under the License.
|
| 15 |
+
"""
|
| 16 |
+
Refactored Depth Anything 3 CLI
|
| 17 |
+
Clean, modular command-line interface
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from __future__ import annotations
|
| 21 |
+
|
| 22 |
+
import os
|
| 23 |
+
import typer
|
| 24 |
+
|
| 25 |
+
from depth_anything_3.services import start_server
|
| 26 |
+
from depth_anything_3.services.gallery import gallery as gallery_main
|
| 27 |
+
from depth_anything_3.services.inference_service import run_inference
|
| 28 |
+
from depth_anything_3.services.input_handlers import (
|
| 29 |
+
ColmapHandler,
|
| 30 |
+
ImageHandler,
|
| 31 |
+
ImagesHandler,
|
| 32 |
+
InputHandler,
|
| 33 |
+
VideoHandler,
|
| 34 |
+
parse_export_feat,
|
| 35 |
+
)
|
| 36 |
+
from depth_anything_3.utils.constants import DEFAULT_EXPORT_DIR, DEFAULT_GALLERY_DIR, DEFAULT_GRADIO_DIR, DEFAULT_MODEL
|
| 37 |
+
|
| 38 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
| 39 |
+
|
| 40 |
+
app = typer.Typer(help="Depth Anything 3 - Video depth estimation CLI", add_completion=False)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# ============================================================================
|
| 44 |
+
# Input type detection utilities
|
| 45 |
+
# ============================================================================
|
| 46 |
+
|
| 47 |
+
# Supported file extensions
|
| 48 |
+
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff", ".tif"}
|
| 49 |
+
VIDEO_EXTENSIONS = {".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v"}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def detect_input_type(input_path: str) -> str:
|
| 53 |
+
"""
|
| 54 |
+
Detect input type from path.
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
- "image": Single image file
|
| 58 |
+
- "images": Directory containing images
|
| 59 |
+
- "video": Video file
|
| 60 |
+
- "colmap": COLMAP directory structure
|
| 61 |
+
- "unknown": Cannot determine type
|
| 62 |
+
"""
|
| 63 |
+
if not os.path.exists(input_path):
|
| 64 |
+
return "unknown"
|
| 65 |
+
|
| 66 |
+
# Check if it's a file
|
| 67 |
+
if os.path.isfile(input_path):
|
| 68 |
+
ext = os.path.splitext(input_path)[1].lower()
|
| 69 |
+
if ext in IMAGE_EXTENSIONS:
|
| 70 |
+
return "image"
|
| 71 |
+
elif ext in VIDEO_EXTENSIONS:
|
| 72 |
+
return "video"
|
| 73 |
+
return "unknown"
|
| 74 |
+
|
| 75 |
+
# Check if it's a directory
|
| 76 |
+
if os.path.isdir(input_path):
|
| 77 |
+
# Check for COLMAP structure
|
| 78 |
+
images_dir = os.path.join(input_path, "images")
|
| 79 |
+
sparse_dir = os.path.join(input_path, "sparse")
|
| 80 |
+
|
| 81 |
+
if os.path.isdir(images_dir) and os.path.isdir(sparse_dir):
|
| 82 |
+
return "colmap"
|
| 83 |
+
|
| 84 |
+
# Check if directory contains image files
|
| 85 |
+
for item in os.listdir(input_path):
|
| 86 |
+
item_path = os.path.join(input_path, item)
|
| 87 |
+
if os.path.isfile(item_path):
|
| 88 |
+
ext = os.path.splitext(item)[1].lower()
|
| 89 |
+
if ext in IMAGE_EXTENSIONS:
|
| 90 |
+
return "images"
|
| 91 |
+
|
| 92 |
+
return "unknown"
|
| 93 |
+
|
| 94 |
+
return "unknown"
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# ============================================================================
|
| 98 |
+
# Common parameters and configuration
|
| 99 |
+
# ============================================================================
|
| 100 |
+
|
| 101 |
+
# ============================================================================
|
| 102 |
+
# Inference commands
|
| 103 |
+
# ============================================================================
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
@app.command()
|
| 107 |
+
def auto(
|
| 108 |
+
input_path: str = typer.Argument(
|
| 109 |
+
..., help="Path to input (image, directory, video, or COLMAP)"
|
| 110 |
+
),
|
| 111 |
+
model_dir: str = typer.Option(DEFAULT_MODEL, help="Model directory path"),
|
| 112 |
+
export_dir: str = typer.Option(DEFAULT_EXPORT_DIR, help="Export directory"),
|
| 113 |
+
export_format: str = typer.Option("glb", help="Export format"),
|
| 114 |
+
device: str = typer.Option("cuda", help="Device to use"),
|
| 115 |
+
use_backend: bool = typer.Option(False, help="Use backend service for inference"),
|
| 116 |
+
backend_url: str = typer.Option(
|
| 117 |
+
"http://localhost:8008", help="Backend URL (default: http://localhost:8008)"
|
| 118 |
+
),
|
| 119 |
+
process_res: int = typer.Option(504, help="Processing resolution"),
|
| 120 |
+
process_res_method: str = typer.Option(
|
| 121 |
+
"upper_bound_resize", help="Processing resolution method"
|
| 122 |
+
),
|
| 123 |
+
export_feat: str = typer.Option(
|
| 124 |
+
"",
|
| 125 |
+
help="[FEAT_VIS]Export features from specified layers using comma-separated indices (e.g., '0,1,2').",
|
| 126 |
+
),
|
| 127 |
+
auto_cleanup: bool = typer.Option(
|
| 128 |
+
False, help="Automatically clean export directory if it exists (no prompt)"
|
| 129 |
+
),
|
| 130 |
+
# Video-specific options
|
| 131 |
+
fps: float = typer.Option(1.0, help="[Video] Sampling FPS for frame extraction"),
|
| 132 |
+
# COLMAP-specific options
|
| 133 |
+
sparse_subdir: str = typer.Option(
|
| 134 |
+
"", help="[COLMAP] Sparse reconstruction subdirectory (e.g., '0' for sparse/0/)"
|
| 135 |
+
),
|
| 136 |
+
align_to_input_ext_scale: bool = typer.Option(
|
| 137 |
+
True, help="[COLMAP] Align prediction to input extrinsics scale"
|
| 138 |
+
),
|
| 139 |
+
# GLB export options
|
| 140 |
+
conf_thresh_percentile: float = typer.Option(
|
| 141 |
+
40.0, help="[GLB] Lower percentile for adaptive confidence threshold"
|
| 142 |
+
),
|
| 143 |
+
num_max_points: int = typer.Option(
|
| 144 |
+
1_000_000, help="[GLB] Maximum number of points in the point cloud"
|
| 145 |
+
),
|
| 146 |
+
show_cameras: bool = typer.Option(
|
| 147 |
+
True, help="[GLB] Show camera wireframes in the exported scene"
|
| 148 |
+
),
|
| 149 |
+
# Feat_vis export options
|
| 150 |
+
feat_vis_fps: int = typer.Option(15, help="[FEAT_VIS] Frame rate for output video"),
|
| 151 |
+
):
|
| 152 |
+
"""
|
| 153 |
+
Automatically detect input type and run appropriate processing.
|
| 154 |
+
|
| 155 |
+
Supports:
|
| 156 |
+
- Single image file (.jpg, .png, etc.)
|
| 157 |
+
- Directory of images
|
| 158 |
+
- Video file (.mp4, .avi, etc.)
|
| 159 |
+
- COLMAP directory (with 'images' and 'sparse' subdirectories)
|
| 160 |
+
"""
|
| 161 |
+
# Detect input type
|
| 162 |
+
input_type = detect_input_type(input_path)
|
| 163 |
+
|
| 164 |
+
if input_type == "unknown":
|
| 165 |
+
typer.echo(f"❌ Error: Cannot determine input type for: {input_path}", err=True)
|
| 166 |
+
typer.echo("Supported inputs:", err=True)
|
| 167 |
+
typer.echo(" - Single image file (.jpg, .png, etc.)", err=True)
|
| 168 |
+
typer.echo(" - Directory containing images", err=True)
|
| 169 |
+
typer.echo(" - Video file (.mp4, .avi, etc.)", err=True)
|
| 170 |
+
typer.echo(" - COLMAP directory (with 'images/' and 'sparse/' subdirectories)", err=True)
|
| 171 |
+
raise typer.Exit(1)
|
| 172 |
+
|
| 173 |
+
# Display detected type
|
| 174 |
+
typer.echo(f"🔍 Detected input type: {input_type.upper()}")
|
| 175 |
+
typer.echo(f"📁 Input path: {input_path}")
|
| 176 |
+
typer.echo()
|
| 177 |
+
|
| 178 |
+
# Determine backend URL based on use_backend flag
|
| 179 |
+
final_backend_url = backend_url if use_backend else None
|
| 180 |
+
|
| 181 |
+
# Parse export_feat parameter
|
| 182 |
+
export_feat_layers = parse_export_feat(export_feat)
|
| 183 |
+
|
| 184 |
+
# Route to appropriate handler
|
| 185 |
+
if input_type == "image":
|
| 186 |
+
typer.echo("Processing single image...")
|
| 187 |
+
# Process input
|
| 188 |
+
image_files = ImageHandler.process(input_path)
|
| 189 |
+
|
| 190 |
+
# Handle export directory
|
| 191 |
+
export_dir = InputHandler.handle_export_dir(export_dir, auto_cleanup)
|
| 192 |
+
|
| 193 |
+
# Run inference
|
| 194 |
+
run_inference(
|
| 195 |
+
image_paths=image_files,
|
| 196 |
+
export_dir=export_dir,
|
| 197 |
+
model_dir=model_dir,
|
| 198 |
+
device=device,
|
| 199 |
+
backend_url=final_backend_url,
|
| 200 |
+
export_format=export_format,
|
| 201 |
+
process_res=process_res,
|
| 202 |
+
process_res_method=process_res_method,
|
| 203 |
+
export_feat_layers=export_feat_layers,
|
| 204 |
+
conf_thresh_percentile=conf_thresh_percentile,
|
| 205 |
+
num_max_points=num_max_points,
|
| 206 |
+
show_cameras=show_cameras,
|
| 207 |
+
feat_vis_fps=feat_vis_fps,
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
elif input_type == "images":
|
| 211 |
+
typer.echo("Processing directory of images...")
|
| 212 |
+
# Process input - use default extensions
|
| 213 |
+
image_files = ImagesHandler.process(input_path, "png,jpg,jpeg")
|
| 214 |
+
|
| 215 |
+
# Handle export directory
|
| 216 |
+
export_dir = InputHandler.handle_export_dir(export_dir, auto_cleanup)
|
| 217 |
+
|
| 218 |
+
# Run inference
|
| 219 |
+
run_inference(
|
| 220 |
+
image_paths=image_files,
|
| 221 |
+
export_dir=export_dir,
|
| 222 |
+
model_dir=model_dir,
|
| 223 |
+
device=device,
|
| 224 |
+
backend_url=final_backend_url,
|
| 225 |
+
export_format=export_format,
|
| 226 |
+
process_res=process_res,
|
| 227 |
+
process_res_method=process_res_method,
|
| 228 |
+
export_feat_layers=export_feat_layers,
|
| 229 |
+
conf_thresh_percentile=conf_thresh_percentile,
|
| 230 |
+
num_max_points=num_max_points,
|
| 231 |
+
show_cameras=show_cameras,
|
| 232 |
+
feat_vis_fps=feat_vis_fps,
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
elif input_type == "video":
|
| 236 |
+
typer.echo(f"Processing video with FPS={fps}...")
|
| 237 |
+
# Handle export directory
|
| 238 |
+
export_dir = InputHandler.handle_export_dir(export_dir, auto_cleanup)
|
| 239 |
+
|
| 240 |
+
# Process input
|
| 241 |
+
image_files = VideoHandler.process(input_path, export_dir, fps)
|
| 242 |
+
|
| 243 |
+
# Run inference
|
| 244 |
+
run_inference(
|
| 245 |
+
image_paths=image_files,
|
| 246 |
+
export_dir=export_dir,
|
| 247 |
+
model_dir=model_dir,
|
| 248 |
+
device=device,
|
| 249 |
+
backend_url=final_backend_url,
|
| 250 |
+
export_format=export_format,
|
| 251 |
+
process_res=process_res,
|
| 252 |
+
process_res_method=process_res_method,
|
| 253 |
+
export_feat_layers=export_feat_layers,
|
| 254 |
+
conf_thresh_percentile=conf_thresh_percentile,
|
| 255 |
+
num_max_points=num_max_points,
|
| 256 |
+
show_cameras=show_cameras,
|
| 257 |
+
feat_vis_fps=feat_vis_fps,
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
elif input_type == "colmap":
|
| 261 |
+
typer.echo(
|
| 262 |
+
f"Processing COLMAP directory (sparse subdirectory: '{sparse_subdir or 'default'}')..."
|
| 263 |
+
)
|
| 264 |
+
# Process input
|
| 265 |
+
image_files, extrinsics, intrinsics = ColmapHandler.process(input_path, sparse_subdir)
|
| 266 |
+
|
| 267 |
+
# Handle export directory
|
| 268 |
+
export_dir = InputHandler.handle_export_dir(export_dir, auto_cleanup)
|
| 269 |
+
|
| 270 |
+
# Run inference
|
| 271 |
+
run_inference(
|
| 272 |
+
image_paths=image_files,
|
| 273 |
+
export_dir=export_dir,
|
| 274 |
+
model_dir=model_dir,
|
| 275 |
+
device=device,
|
| 276 |
+
backend_url=final_backend_url,
|
| 277 |
+
export_format=export_format,
|
| 278 |
+
process_res=process_res,
|
| 279 |
+
process_res_method=process_res_method,
|
| 280 |
+
export_feat_layers=export_feat_layers,
|
| 281 |
+
extrinsics=extrinsics,
|
| 282 |
+
intrinsics=intrinsics,
|
| 283 |
+
align_to_input_ext_scale=align_to_input_ext_scale,
|
| 284 |
+
conf_thresh_percentile=conf_thresh_percentile,
|
| 285 |
+
num_max_points=num_max_points,
|
| 286 |
+
show_cameras=show_cameras,
|
| 287 |
+
feat_vis_fps=feat_vis_fps,
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
typer.echo()
|
| 291 |
+
typer.echo("✅ Processing completed successfully!")
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
@app.command()
|
| 295 |
+
def image(
|
| 296 |
+
image_path: str = typer.Argument(..., help="Path to input image file"),
|
| 297 |
+
model_dir: str = typer.Option(DEFAULT_MODEL, help="Model directory path"),
|
| 298 |
+
export_dir: str = typer.Option(DEFAULT_EXPORT_DIR, help="Export directory"),
|
| 299 |
+
export_format: str = typer.Option("glb", help="Export format"),
|
| 300 |
+
device: str = typer.Option("cuda", help="Device to use"),
|
| 301 |
+
use_backend: bool = typer.Option(False, help="Use backend service for inference"),
|
| 302 |
+
backend_url: str = typer.Option(
|
| 303 |
+
"http://localhost:8008", help="Backend URL (default: http://localhost:8008)"
|
| 304 |
+
),
|
| 305 |
+
process_res: int = typer.Option(504, help="Processing resolution"),
|
| 306 |
+
process_res_method: str = typer.Option(
|
| 307 |
+
"upper_bound_resize", help="Processing resolution method"
|
| 308 |
+
),
|
| 309 |
+
export_feat: str = typer.Option(
|
| 310 |
+
"",
|
| 311 |
+
help="[FEAT_VIS] Export features from specified layers using comma-separated indices (e.g., '0,1,2').",
|
| 312 |
+
),
|
| 313 |
+
auto_cleanup: bool = typer.Option(
|
| 314 |
+
False, help="Automatically clean export directory if it exists (no prompt)"
|
| 315 |
+
),
|
| 316 |
+
# GLB export options
|
| 317 |
+
conf_thresh_percentile: float = typer.Option(
|
| 318 |
+
40.0, help="[GLB] Lower percentile for adaptive confidence threshold"
|
| 319 |
+
),
|
| 320 |
+
num_max_points: int = typer.Option(
|
| 321 |
+
1_000_000, help="[GLB] Maximum number of points in the point cloud"
|
| 322 |
+
),
|
| 323 |
+
show_cameras: bool = typer.Option(
|
| 324 |
+
True, help="[GLB] Show camera wireframes in the exported scene"
|
| 325 |
+
),
|
| 326 |
+
# Feat_vis export options
|
| 327 |
+
feat_vis_fps: int = typer.Option(15, help="[FEAT_VIS] Frame rate for output video"),
|
| 328 |
+
):
|
| 329 |
+
"""Run camera pose and depth estimation on a single image."""
|
| 330 |
+
# Process input
|
| 331 |
+
image_files = ImageHandler.process(image_path)
|
| 332 |
+
|
| 333 |
+
# Handle export directory
|
| 334 |
+
export_dir = InputHandler.handle_export_dir(export_dir, auto_cleanup)
|
| 335 |
+
|
| 336 |
+
# Parse export_feat parameter
|
| 337 |
+
export_feat_layers = parse_export_feat(export_feat)
|
| 338 |
+
|
| 339 |
+
# Determine backend URL based on use_backend flag
|
| 340 |
+
final_backend_url = backend_url if use_backend else None
|
| 341 |
+
|
| 342 |
+
# Run inference
|
| 343 |
+
run_inference(
|
| 344 |
+
image_paths=image_files,
|
| 345 |
+
export_dir=export_dir,
|
| 346 |
+
model_dir=model_dir,
|
| 347 |
+
device=device,
|
| 348 |
+
backend_url=final_backend_url,
|
| 349 |
+
export_format=export_format,
|
| 350 |
+
process_res=process_res,
|
| 351 |
+
process_res_method=process_res_method,
|
| 352 |
+
export_feat_layers=export_feat_layers,
|
| 353 |
+
conf_thresh_percentile=conf_thresh_percentile,
|
| 354 |
+
num_max_points=num_max_points,
|
| 355 |
+
show_cameras=show_cameras,
|
| 356 |
+
feat_vis_fps=feat_vis_fps,
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
@app.command()
|
| 361 |
+
def images(
|
| 362 |
+
images_dir: str = typer.Argument(..., help="Path to directory containing input images"),
|
| 363 |
+
image_extensions: str = typer.Option(
|
| 364 |
+
"png,jpg,jpeg", help="Comma-separated image file extensions to process"
|
| 365 |
+
),
|
| 366 |
+
model_dir: str = typer.Option(DEFAULT_MODEL, help="Model directory path"),
|
| 367 |
+
export_dir: str = typer.Option(DEFAULT_EXPORT_DIR, help="Export directory"),
|
| 368 |
+
export_format: str = typer.Option("glb", help="Export format"),
|
| 369 |
+
device: str = typer.Option("cuda", help="Device to use"),
|
| 370 |
+
use_backend: bool = typer.Option(False, help="Use backend service for inference"),
|
| 371 |
+
backend_url: str = typer.Option(
|
| 372 |
+
"http://localhost:8008", help="Backend URL (default: http://localhost:8008)"
|
| 373 |
+
),
|
| 374 |
+
process_res: int = typer.Option(504, help="Processing resolution"),
|
| 375 |
+
process_res_method: str = typer.Option(
|
| 376 |
+
"upper_bound_resize", help="Processing resolution method"
|
| 377 |
+
),
|
| 378 |
+
export_feat: str = typer.Option(
|
| 379 |
+
"",
|
| 380 |
+
help="[FEAT_VIS] Export features from specified layers using comma-separated indices (e.g., '0,1,2').",
|
| 381 |
+
),
|
| 382 |
+
auto_cleanup: bool = typer.Option(
|
| 383 |
+
False, help="Automatically clean export directory if it exists (no prompt)"
|
| 384 |
+
),
|
| 385 |
+
# GLB export options
|
| 386 |
+
conf_thresh_percentile: float = typer.Option(
|
| 387 |
+
40.0, help="[GLB] Lower percentile for adaptive confidence threshold"
|
| 388 |
+
),
|
| 389 |
+
num_max_points: int = typer.Option(
|
| 390 |
+
1_000_000, help="[GLB] Maximum number of points in the point cloud"
|
| 391 |
+
),
|
| 392 |
+
show_cameras: bool = typer.Option(
|
| 393 |
+
True, help="[GLB] Show camera wireframes in the exported scene"
|
| 394 |
+
),
|
| 395 |
+
# Feat_vis export options
|
| 396 |
+
feat_vis_fps: int = typer.Option(15, help="[FEAT_VIS] Frame rate for output video"),
|
| 397 |
+
):
|
| 398 |
+
"""Run camera pose and depth estimation on a directory of images."""
|
| 399 |
+
# Process input
|
| 400 |
+
image_files = ImagesHandler.process(images_dir, image_extensions)
|
| 401 |
+
|
| 402 |
+
# Handle export directory
|
| 403 |
+
export_dir = InputHandler.handle_export_dir(export_dir, auto_cleanup)
|
| 404 |
+
|
| 405 |
+
# Parse export_feat parameter
|
| 406 |
+
export_feat_layers = parse_export_feat(export_feat)
|
| 407 |
+
|
| 408 |
+
# Determine backend URL based on use_backend flag
|
| 409 |
+
final_backend_url = backend_url if use_backend else None
|
| 410 |
+
|
| 411 |
+
# Run inference
|
| 412 |
+
run_inference(
|
| 413 |
+
image_paths=image_files,
|
| 414 |
+
export_dir=export_dir,
|
| 415 |
+
model_dir=model_dir,
|
| 416 |
+
device=device,
|
| 417 |
+
backend_url=final_backend_url,
|
| 418 |
+
export_format=export_format,
|
| 419 |
+
process_res=process_res,
|
| 420 |
+
process_res_method=process_res_method,
|
| 421 |
+
export_feat_layers=export_feat_layers,
|
| 422 |
+
conf_thresh_percentile=conf_thresh_percentile,
|
| 423 |
+
num_max_points=num_max_points,
|
| 424 |
+
show_cameras=show_cameras,
|
| 425 |
+
feat_vis_fps=feat_vis_fps,
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
|
| 429 |
+
@app.command()
|
| 430 |
+
def colmap(
|
| 431 |
+
colmap_dir: str = typer.Argument(
|
| 432 |
+
..., help="Path to COLMAP directory containing 'images' and 'sparse' subdirectories"
|
| 433 |
+
),
|
| 434 |
+
sparse_subdir: str = typer.Option(
|
| 435 |
+
"", help="Sparse reconstruction subdirectory (e.g., '0' for sparse/0/, empty for sparse/)"
|
| 436 |
+
),
|
| 437 |
+
align_to_input_ext_scale: bool = typer.Option(
|
| 438 |
+
True, help="Align prediction to input extrinsics scale"
|
| 439 |
+
),
|
| 440 |
+
model_dir: str = typer.Option(DEFAULT_MODEL, help="Model directory path"),
|
| 441 |
+
export_dir: str = typer.Option(DEFAULT_EXPORT_DIR, help="Export directory"),
|
| 442 |
+
export_format: str = typer.Option("glb", help="Export format"),
|
| 443 |
+
device: str = typer.Option("cuda", help="Device to use"),
|
| 444 |
+
use_backend: bool = typer.Option(False, help="Use backend service for inference"),
|
| 445 |
+
backend_url: str = typer.Option(
|
| 446 |
+
"http://localhost:8008", help="Backend URL (default: http://localhost:8008)"
|
| 447 |
+
),
|
| 448 |
+
process_res: int = typer.Option(504, help="Processing resolution"),
|
| 449 |
+
process_res_method: str = typer.Option(
|
| 450 |
+
"upper_bound_resize", help="Processing resolution method"
|
| 451 |
+
),
|
| 452 |
+
export_feat: str = typer.Option(
|
| 453 |
+
"",
|
| 454 |
+
help="Export features from specified layers using comma-separated indices (e.g., '0,1,2').",
|
| 455 |
+
),
|
| 456 |
+
auto_cleanup: bool = typer.Option(
|
| 457 |
+
False, help="Automatically clean export directory if it exists (no prompt)"
|
| 458 |
+
),
|
| 459 |
+
# GLB export options
|
| 460 |
+
conf_thresh_percentile: float = typer.Option(
|
| 461 |
+
40.0, help="[GLB] Lower percentile for adaptive confidence threshold"
|
| 462 |
+
),
|
| 463 |
+
num_max_points: int = typer.Option(
|
| 464 |
+
1_000_000, help="[GLB] Maximum number of points in the point cloud"
|
| 465 |
+
),
|
| 466 |
+
show_cameras: bool = typer.Option(
|
| 467 |
+
True, help="[GLB] Show camera wireframes in the exported scene"
|
| 468 |
+
),
|
| 469 |
+
# Feat_vis export options
|
| 470 |
+
feat_vis_fps: int = typer.Option(15, help="[FEAT_VIS] Frame rate for output video"),
|
| 471 |
+
):
|
| 472 |
+
"""Run pose conditioned depth estimation on COLMAP data."""
|
| 473 |
+
# Process input
|
| 474 |
+
image_files, extrinsics, intrinsics = ColmapHandler.process(colmap_dir, sparse_subdir)
|
| 475 |
+
|
| 476 |
+
# Handle export directory
|
| 477 |
+
export_dir = InputHandler.handle_export_dir(export_dir, auto_cleanup)
|
| 478 |
+
|
| 479 |
+
# Parse export_feat parameter
|
| 480 |
+
export_feat_layers = parse_export_feat(export_feat)
|
| 481 |
+
|
| 482 |
+
# Determine backend URL based on use_backend flag
|
| 483 |
+
final_backend_url = backend_url if use_backend else None
|
| 484 |
+
|
| 485 |
+
# Run inference
|
| 486 |
+
run_inference(
|
| 487 |
+
image_paths=image_files,
|
| 488 |
+
export_dir=export_dir,
|
| 489 |
+
model_dir=model_dir,
|
| 490 |
+
device=device,
|
| 491 |
+
backend_url=final_backend_url,
|
| 492 |
+
export_format=export_format,
|
| 493 |
+
process_res=process_res,
|
| 494 |
+
process_res_method=process_res_method,
|
| 495 |
+
export_feat_layers=export_feat_layers,
|
| 496 |
+
extrinsics=extrinsics,
|
| 497 |
+
intrinsics=intrinsics,
|
| 498 |
+
align_to_input_ext_scale=align_to_input_ext_scale,
|
| 499 |
+
conf_thresh_percentile=conf_thresh_percentile,
|
| 500 |
+
num_max_points=num_max_points,
|
| 501 |
+
show_cameras=show_cameras,
|
| 502 |
+
feat_vis_fps=feat_vis_fps,
|
| 503 |
+
)
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
@app.command()
|
| 507 |
+
def video(
|
| 508 |
+
video_path: str = typer.Argument(..., help="Path to input video file"),
|
| 509 |
+
fps: float = typer.Option(1.0, help="Sampling FPS for frame extraction"),
|
| 510 |
+
model_dir: str = typer.Option(DEFAULT_MODEL, help="Model directory path"),
|
| 511 |
+
export_dir: str = typer.Option(DEFAULT_EXPORT_DIR, help="Export directory"),
|
| 512 |
+
export_format: str = typer.Option("glb", help="Export format"),
|
| 513 |
+
device: str = typer.Option("cuda", help="Device to use"),
|
| 514 |
+
use_backend: bool = typer.Option(False, help="Use backend service for inference"),
|
| 515 |
+
backend_url: str = typer.Option(
|
| 516 |
+
"http://localhost:8008", help="Backend URL (default: http://localhost:8008)"
|
| 517 |
+
),
|
| 518 |
+
process_res: int = typer.Option(504, help="Processing resolution"),
|
| 519 |
+
process_res_method: str = typer.Option(
|
| 520 |
+
"upper_bound_resize", help="Processing resolution method"
|
| 521 |
+
),
|
| 522 |
+
export_feat: str = typer.Option(
|
| 523 |
+
"",
|
| 524 |
+
help="[FEAT_VIS] Export features from specified layers using comma-separated indices (e.g., '0,1,2').",
|
| 525 |
+
),
|
| 526 |
+
auto_cleanup: bool = typer.Option(
|
| 527 |
+
False, help="Automatically clean export directory if it exists (no prompt)"
|
| 528 |
+
),
|
| 529 |
+
# GLB export options
|
| 530 |
+
conf_thresh_percentile: float = typer.Option(
|
| 531 |
+
40.0, help="[GLB] Lower percentile for adaptive confidence threshold"
|
| 532 |
+
),
|
| 533 |
+
num_max_points: int = typer.Option(
|
| 534 |
+
1_000_000, help="[GLB] Maximum number of points in the point cloud"
|
| 535 |
+
),
|
| 536 |
+
show_cameras: bool = typer.Option(
|
| 537 |
+
True, help="[GLB] Show camera wireframes in the exported scene"
|
| 538 |
+
),
|
| 539 |
+
# Feat_vis export options
|
| 540 |
+
feat_vis_fps: int = typer.Option(15, help="[FEAT_VIS] Frame rate for output video"),
|
| 541 |
+
):
|
| 542 |
+
"""Run depth estimation on video by extracting frames and processing them."""
|
| 543 |
+
# Handle export directory
|
| 544 |
+
export_dir = InputHandler.handle_export_dir(export_dir, auto_cleanup)
|
| 545 |
+
|
| 546 |
+
# Process input
|
| 547 |
+
image_files = VideoHandler.process(video_path, export_dir, fps)
|
| 548 |
+
|
| 549 |
+
# Parse export_feat parameter
|
| 550 |
+
export_feat_layers = parse_export_feat(export_feat)
|
| 551 |
+
|
| 552 |
+
# Determine backend URL based on use_backend flag
|
| 553 |
+
final_backend_url = backend_url if use_backend else None
|
| 554 |
+
|
| 555 |
+
# Run inference
|
| 556 |
+
run_inference(
|
| 557 |
+
image_paths=image_files,
|
| 558 |
+
export_dir=export_dir,
|
| 559 |
+
model_dir=model_dir,
|
| 560 |
+
device=device,
|
| 561 |
+
backend_url=final_backend_url,
|
| 562 |
+
export_format=export_format,
|
| 563 |
+
process_res=process_res,
|
| 564 |
+
process_res_method=process_res_method,
|
| 565 |
+
export_feat_layers=export_feat_layers,
|
| 566 |
+
conf_thresh_percentile=conf_thresh_percentile,
|
| 567 |
+
num_max_points=num_max_points,
|
| 568 |
+
show_cameras=show_cameras,
|
| 569 |
+
feat_vis_fps=feat_vis_fps,
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
|
| 573 |
+
# ============================================================================
|
| 574 |
+
# Service management commands
|
| 575 |
+
# ============================================================================
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
@app.command()
|
| 579 |
+
def backend(
|
| 580 |
+
model_dir: str = typer.Option(DEFAULT_MODEL, help="Model directory path"),
|
| 581 |
+
device: str = typer.Option("cuda", help="Device to use"),
|
| 582 |
+
host: str = typer.Option("127.0.0.1", help="Host to bind to"),
|
| 583 |
+
port: int = typer.Option(8008, help="Port to bind to"),
|
| 584 |
+
gallery_dir: str = typer.Option(DEFAULT_GALLERY_DIR, help="Gallery directory path (optional)"),
|
| 585 |
+
):
|
| 586 |
+
"""Start model backend service with integrated gallery."""
|
| 587 |
+
typer.echo("=" * 60)
|
| 588 |
+
typer.echo("🚀 Starting Depth Anything 3 Backend Server")
|
| 589 |
+
typer.echo("=" * 60)
|
| 590 |
+
typer.echo(f"Model directory: {model_dir}")
|
| 591 |
+
typer.echo(f"Device: {device}")
|
| 592 |
+
|
| 593 |
+
# Check if gallery directory exists
|
| 594 |
+
if gallery_dir and os.path.exists(gallery_dir):
|
| 595 |
+
typer.echo(f"Gallery directory: {gallery_dir}")
|
| 596 |
+
else:
|
| 597 |
+
gallery_dir = None # Disable gallery if directory doesn't exist
|
| 598 |
+
|
| 599 |
+
typer.echo()
|
| 600 |
+
typer.echo("📡 Server URLs (Ctrl/CMD+Click to open):")
|
| 601 |
+
typer.echo(f" 🏠 Home: http://{host}:{port}")
|
| 602 |
+
typer.echo(f" 📊 Dashboard: http://{host}:{port}/dashboard")
|
| 603 |
+
typer.echo(f" 📈 API Status: http://{host}:{port}/status")
|
| 604 |
+
|
| 605 |
+
if gallery_dir:
|
| 606 |
+
typer.echo(f" 🎨 Gallery: http://{host}:{port}/gallery/")
|
| 607 |
+
|
| 608 |
+
typer.echo("=" * 60)
|
| 609 |
+
|
| 610 |
+
try:
|
| 611 |
+
start_server(model_dir, device, host, port, gallery_dir)
|
| 612 |
+
except KeyboardInterrupt:
|
| 613 |
+
typer.echo("\n👋 Backend server stopped.")
|
| 614 |
+
except Exception as e:
|
| 615 |
+
typer.echo(f"❌ Failed to start backend: {e}")
|
| 616 |
+
raise typer.Exit(1)
|
| 617 |
+
|
| 618 |
+
|
| 619 |
+
# ============================================================================
|
| 620 |
+
# Application launch commands
|
| 621 |
+
# ============================================================================
|
| 622 |
+
|
| 623 |
+
|
| 624 |
+
@app.command()
|
| 625 |
+
def gradio(
|
| 626 |
+
model_dir: str = typer.Option(DEFAULT_MODEL,help="Model directory path"),
|
| 627 |
+
workspace_dir: str = typer.Option(DEFAULT_GRADIO_DIR,help="Workspace directory path"),
|
| 628 |
+
gallery_dir: str = typer.Option(DEFAULT_GALLERY_DIR,help="Gallery directory path"),
|
| 629 |
+
host: str = typer.Option("127.0.0.1", help="Host address to bind to"),
|
| 630 |
+
port: int = typer.Option(7860, help="Port number to bind to"),
|
| 631 |
+
share: bool = typer.Option(False, help="Create a public link for the app"),
|
| 632 |
+
debug: bool = typer.Option(False, help="Enable debug mode"),
|
| 633 |
+
cache_examples: bool = typer.Option(
|
| 634 |
+
False, help="Pre-cache all example scenes at startup for faster loading"
|
| 635 |
+
),
|
| 636 |
+
cache_gs_tag: str = typer.Option(
|
| 637 |
+
"",
|
| 638 |
+
help="Tag to match scene names for high-res+3DGS caching (e.g., 'dl3dv'). Scenes containing this tag will use high_res and infer_gs=True; others will use low_res only.",
|
| 639 |
+
),
|
| 640 |
+
):
|
| 641 |
+
"""Launch Depth Anything 3 Gradio interactive web application"""
|
| 642 |
+
from depth_anything_3.app.gradio_app import DepthAnything3App
|
| 643 |
+
|
| 644 |
+
# Create necessary directories
|
| 645 |
+
os.makedirs(workspace_dir, exist_ok=True)
|
| 646 |
+
os.makedirs(gallery_dir, exist_ok=True)
|
| 647 |
+
|
| 648 |
+
typer.echo("Launching Depth Anything 3 Gradio application...")
|
| 649 |
+
typer.echo(f"Model directory: {model_dir}")
|
| 650 |
+
typer.echo(f"Workspace directory: {workspace_dir}")
|
| 651 |
+
typer.echo(f"Gallery directory: {gallery_dir}")
|
| 652 |
+
typer.echo(f"Host: {host}")
|
| 653 |
+
typer.echo(f"Port: {port}")
|
| 654 |
+
typer.echo(f"Share: {share}")
|
| 655 |
+
typer.echo(f"Debug mode: {debug}")
|
| 656 |
+
typer.echo(f"Cache examples: {cache_examples}")
|
| 657 |
+
if cache_examples:
|
| 658 |
+
if cache_gs_tag:
|
| 659 |
+
typer.echo(
|
| 660 |
+
f"Cache GS Tag: '{cache_gs_tag}' (scenes matching this tag will use high-res + 3DGS)"
|
| 661 |
+
)
|
| 662 |
+
else:
|
| 663 |
+
typer.echo(f"Cache GS Tag: None (all scenes will use low-res only)")
|
| 664 |
+
|
| 665 |
+
try:
|
| 666 |
+
# Initialize and launch application
|
| 667 |
+
app = DepthAnything3App(
|
| 668 |
+
model_dir=model_dir, workspace_dir=workspace_dir, gallery_dir=gallery_dir
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
# Pre-cache examples if requested
|
| 672 |
+
if cache_examples:
|
| 673 |
+
typer.echo("\n" + "=" * 60)
|
| 674 |
+
typer.echo("Pre-caching mode enabled")
|
| 675 |
+
if cache_gs_tag:
|
| 676 |
+
typer.echo(f"Scenes containing '{cache_gs_tag}' will use HIGH-RES + 3DGS")
|
| 677 |
+
typer.echo(f"Other scenes will use LOW-RES only")
|
| 678 |
+
else:
|
| 679 |
+
typer.echo(f"All scenes will use LOW-RES only")
|
| 680 |
+
typer.echo("=" * 60)
|
| 681 |
+
app.cache_examples(
|
| 682 |
+
show_cam=True,
|
| 683 |
+
filter_black_bg=False,
|
| 684 |
+
filter_white_bg=False,
|
| 685 |
+
save_percentage=20.0,
|
| 686 |
+
num_max_points=1000,
|
| 687 |
+
cache_gs_tag=cache_gs_tag,
|
| 688 |
+
gs_trj_mode="smooth",
|
| 689 |
+
gs_video_quality="low",
|
| 690 |
+
)
|
| 691 |
+
|
| 692 |
+
# Prepare launch arguments
|
| 693 |
+
launch_kwargs = {"share": share, "debug": debug}
|
| 694 |
+
|
| 695 |
+
app.launch(host=host, port=port, **launch_kwargs)
|
| 696 |
+
|
| 697 |
+
except KeyboardInterrupt:
|
| 698 |
+
typer.echo("\nGradio application stopped.")
|
| 699 |
+
except Exception as e:
|
| 700 |
+
typer.echo(f"Failed to launch Gradio application: {e}")
|
| 701 |
+
raise typer.Exit(1)
|
| 702 |
+
|
| 703 |
+
|
| 704 |
+
@app.command()
|
| 705 |
+
def gallery(
|
| 706 |
+
gallery_dir: str = typer.Option(DEFAULT_GALLERY_DIR, help="Gallery root directory"),
|
| 707 |
+
host: str = typer.Option("127.0.0.1", help="Host address to bind to"),
|
| 708 |
+
port: int = typer.Option(8007, help="Port number to bind to"),
|
| 709 |
+
open_browser: bool = typer.Option(False, help="Open browser after launch"),
|
| 710 |
+
):
|
| 711 |
+
"""Launch Depth Anything 3 Gallery server"""
|
| 712 |
+
|
| 713 |
+
# Validate gallery directory
|
| 714 |
+
if not os.path.exists(gallery_dir):
|
| 715 |
+
raise typer.BadParameter(f"Gallery directory not found: {gallery_dir}")
|
| 716 |
+
|
| 717 |
+
typer.echo("Launching Depth Anything 3 Gallery server...")
|
| 718 |
+
typer.echo(f"Gallery directory: {gallery_dir}")
|
| 719 |
+
typer.echo(f"Host: {host}")
|
| 720 |
+
typer.echo(f"Port: {port}")
|
| 721 |
+
typer.echo(f"Auto-open browser: {open_browser}")
|
| 722 |
+
|
| 723 |
+
try:
|
| 724 |
+
# Set command line arguments
|
| 725 |
+
import sys
|
| 726 |
+
|
| 727 |
+
sys.argv = ["gallery", "--dir", gallery_dir, "--host", host, "--port", str(port)]
|
| 728 |
+
if open_browser:
|
| 729 |
+
sys.argv.append("--open")
|
| 730 |
+
|
| 731 |
+
# Launch gallery server
|
| 732 |
+
gallery_main()
|
| 733 |
+
|
| 734 |
+
except KeyboardInterrupt:
|
| 735 |
+
typer.echo("\nGallery server stopped.")
|
| 736 |
+
except Exception as e:
|
| 737 |
+
typer.echo(f"Failed to launch Gallery server: {e}")
|
| 738 |
+
raise typer.Exit(1)
|
| 739 |
+
|
| 740 |
+
|
| 741 |
+
if __name__ == "__main__":
|
| 742 |
+
app()
|
src/depth_anything_3/configs/da3-base.yaml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__object__:
|
| 2 |
+
path: depth_anything_3.model.da3
|
| 3 |
+
name: DepthAnything3Net
|
| 4 |
+
args: as_params
|
| 5 |
+
|
| 6 |
+
net:
|
| 7 |
+
__object__:
|
| 8 |
+
path: depth_anything_3.model.dinov2.dinov2
|
| 9 |
+
name: DinoV2
|
| 10 |
+
args: as_params
|
| 11 |
+
|
| 12 |
+
name: vitb
|
| 13 |
+
out_layers: [5, 7, 9, 11]
|
| 14 |
+
alt_start: 4
|
| 15 |
+
qknorm_start: 4
|
| 16 |
+
rope_start: 4
|
| 17 |
+
cat_token: True
|
| 18 |
+
|
| 19 |
+
head:
|
| 20 |
+
__object__:
|
| 21 |
+
path: depth_anything_3.model.dualdpt
|
| 22 |
+
name: DualDPT
|
| 23 |
+
args: as_params
|
| 24 |
+
|
| 25 |
+
dim_in: &head_dim_in 1536
|
| 26 |
+
output_dim: 2
|
| 27 |
+
features: &head_features 128
|
| 28 |
+
out_channels: &head_out_channels [96, 192, 384, 768]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
cam_enc:
|
| 32 |
+
__object__:
|
| 33 |
+
path: depth_anything_3.model.cam_enc
|
| 34 |
+
name: CameraEnc
|
| 35 |
+
args: as_params
|
| 36 |
+
|
| 37 |
+
dim_out: 768
|
| 38 |
+
|
| 39 |
+
cam_dec:
|
| 40 |
+
__object__:
|
| 41 |
+
path: depth_anything_3.model.cam_dec
|
| 42 |
+
name: CameraDec
|
| 43 |
+
args: as_params
|
| 44 |
+
|
| 45 |
+
dim_in: 1536
|
src/depth_anything_3/configs/da3-giant.yaml
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__object__:
|
| 2 |
+
path: depth_anything_3.model.da3
|
| 3 |
+
name: DepthAnything3Net
|
| 4 |
+
args: as_params
|
| 5 |
+
|
| 6 |
+
net:
|
| 7 |
+
__object__:
|
| 8 |
+
path: depth_anything_3.model.dinov2.dinov2
|
| 9 |
+
name: DinoV2
|
| 10 |
+
args: as_params
|
| 11 |
+
|
| 12 |
+
name: vitg
|
| 13 |
+
out_layers: [19, 27, 33, 39]
|
| 14 |
+
alt_start: 13
|
| 15 |
+
qknorm_start: 13
|
| 16 |
+
rope_start: 13
|
| 17 |
+
cat_token: True
|
| 18 |
+
|
| 19 |
+
head:
|
| 20 |
+
__object__:
|
| 21 |
+
path: depth_anything_3.model.dualdpt
|
| 22 |
+
name: DualDPT
|
| 23 |
+
args: as_params
|
| 24 |
+
|
| 25 |
+
dim_in: &head_dim_in 3072
|
| 26 |
+
output_dim: 2
|
| 27 |
+
features: &head_features 256
|
| 28 |
+
out_channels: &head_out_channels [256, 512, 1024, 1024]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
cam_enc:
|
| 32 |
+
__object__:
|
| 33 |
+
path: depth_anything_3.model.cam_enc
|
| 34 |
+
name: CameraEnc
|
| 35 |
+
args: as_params
|
| 36 |
+
|
| 37 |
+
dim_out: 1536
|
| 38 |
+
|
| 39 |
+
cam_dec:
|
| 40 |
+
__object__:
|
| 41 |
+
path: depth_anything_3.model.cam_dec
|
| 42 |
+
name: CameraDec
|
| 43 |
+
args: as_params
|
| 44 |
+
|
| 45 |
+
dim_in: 3072
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
gs_head:
|
| 49 |
+
__object__:
|
| 50 |
+
path: depth_anything_3.model.gsdpt
|
| 51 |
+
name: GSDPT
|
| 52 |
+
args: as_params
|
| 53 |
+
|
| 54 |
+
dim_in: *head_dim_in
|
| 55 |
+
output_dim: 38 # should align with gs_adapter's setting, for gs params
|
| 56 |
+
features: *head_features
|
| 57 |
+
out_channels: *head_out_channels
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
gs_adapter:
|
| 61 |
+
__object__:
|
| 62 |
+
path: depth_anything_3.model.gs_adapter
|
| 63 |
+
name: GaussianAdapter
|
| 64 |
+
args: as_params
|
| 65 |
+
|
| 66 |
+
sh_degree: 2
|
| 67 |
+
pred_color: false # predict SH coefficient if false
|
| 68 |
+
pred_offset_depth: true
|
| 69 |
+
pred_offset_xy: true
|
| 70 |
+
gaussian_scale_min: 1e-5
|
| 71 |
+
gaussian_scale_max: 30.0
|
src/depth_anything_3/configs/da3-large.yaml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__object__:
|
| 2 |
+
path: depth_anything_3.model.da3
|
| 3 |
+
name: DepthAnything3Net
|
| 4 |
+
args: as_params
|
| 5 |
+
|
| 6 |
+
net:
|
| 7 |
+
__object__:
|
| 8 |
+
path: depth_anything_3.model.dinov2.dinov2
|
| 9 |
+
name: DinoV2
|
| 10 |
+
args: as_params
|
| 11 |
+
|
| 12 |
+
name: vitl
|
| 13 |
+
out_layers: [11, 15, 19, 23]
|
| 14 |
+
alt_start: 8
|
| 15 |
+
qknorm_start: 8
|
| 16 |
+
rope_start: 8
|
| 17 |
+
cat_token: True
|
| 18 |
+
|
| 19 |
+
head:
|
| 20 |
+
__object__:
|
| 21 |
+
path: depth_anything_3.model.dualdpt
|
| 22 |
+
name: DualDPT
|
| 23 |
+
args: as_params
|
| 24 |
+
|
| 25 |
+
dim_in: &head_dim_in 2048
|
| 26 |
+
output_dim: 2
|
| 27 |
+
features: &head_features 256
|
| 28 |
+
out_channels: &head_out_channels [256, 512, 1024, 1024]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
cam_enc:
|
| 32 |
+
__object__:
|
| 33 |
+
path: depth_anything_3.model.cam_enc
|
| 34 |
+
name: CameraEnc
|
| 35 |
+
args: as_params
|
| 36 |
+
|
| 37 |
+
dim_out: 1024
|
| 38 |
+
|
| 39 |
+
cam_dec:
|
| 40 |
+
__object__:
|
| 41 |
+
path: depth_anything_3.model.cam_dec
|
| 42 |
+
name: CameraDec
|
| 43 |
+
args: as_params
|
| 44 |
+
|
| 45 |
+
dim_in: 2048
|
src/depth_anything_3/configs/da3-small.yaml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__object__:
|
| 2 |
+
path: depth_anything_3.model.da3
|
| 3 |
+
name: DepthAnything3Net
|
| 4 |
+
args: as_params
|
| 5 |
+
|
| 6 |
+
net:
|
| 7 |
+
__object__:
|
| 8 |
+
path: depth_anything_3.model.dinov2.dinov2
|
| 9 |
+
name: DinoV2
|
| 10 |
+
args: as_params
|
| 11 |
+
|
| 12 |
+
name: vits
|
| 13 |
+
out_layers: [5, 7, 9, 11]
|
| 14 |
+
alt_start: 4
|
| 15 |
+
qknorm_start: 4
|
| 16 |
+
rope_start: 4
|
| 17 |
+
cat_token: True
|
| 18 |
+
|
| 19 |
+
head:
|
| 20 |
+
__object__:
|
| 21 |
+
path: depth_anything_3.model.dualdpt
|
| 22 |
+
name: DualDPT
|
| 23 |
+
args: as_params
|
| 24 |
+
|
| 25 |
+
dim_in: &head_dim_in 768
|
| 26 |
+
output_dim: 2
|
| 27 |
+
features: &head_features 64
|
| 28 |
+
out_channels: &head_out_channels [48, 96, 192, 384]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
cam_enc:
|
| 32 |
+
__object__:
|
| 33 |
+
path: depth_anything_3.model.cam_enc
|
| 34 |
+
name: CameraEnc
|
| 35 |
+
args: as_params
|
| 36 |
+
|
| 37 |
+
dim_out: 384
|
| 38 |
+
|
| 39 |
+
cam_dec:
|
| 40 |
+
__object__:
|
| 41 |
+
path: depth_anything_3.model.cam_dec
|
| 42 |
+
name: CameraDec
|
| 43 |
+
args: as_params
|
| 44 |
+
|
| 45 |
+
dim_in: 768
|
src/depth_anything_3/configs/da3metric-large.yaml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__object__:
|
| 2 |
+
path: depth_anything_3.model.da3
|
| 3 |
+
name: DepthAnything3Net
|
| 4 |
+
args: as_params
|
| 5 |
+
|
| 6 |
+
net:
|
| 7 |
+
__object__:
|
| 8 |
+
path: depth_anything_3.model.dinov2.dinov2
|
| 9 |
+
name: DinoV2
|
| 10 |
+
args: as_params
|
| 11 |
+
|
| 12 |
+
name: vitl
|
| 13 |
+
out_layers: [4, 11, 17, 23]
|
| 14 |
+
alt_start: -1 # -1 means disable
|
| 15 |
+
qknorm_start: -1
|
| 16 |
+
rope_start: -1
|
| 17 |
+
cat_token: False
|
| 18 |
+
|
| 19 |
+
head:
|
| 20 |
+
__object__:
|
| 21 |
+
path: depth_anything_3.model.dpt
|
| 22 |
+
name: DPT
|
| 23 |
+
args: as_params
|
| 24 |
+
|
| 25 |
+
dim_in: 1024
|
| 26 |
+
output_dim: 1
|
| 27 |
+
features: 256
|
| 28 |
+
out_channels: [256, 512, 1024, 1024]
|
src/depth_anything_3/configs/da3mono-large.yaml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__object__:
|
| 2 |
+
path: depth_anything_3.model.da3
|
| 3 |
+
name: DepthAnything3Net
|
| 4 |
+
args: as_params
|
| 5 |
+
|
| 6 |
+
net:
|
| 7 |
+
__object__:
|
| 8 |
+
path: depth_anything_3.model.dinov2.dinov2
|
| 9 |
+
name: DinoV2
|
| 10 |
+
args: as_params
|
| 11 |
+
|
| 12 |
+
name: vitl
|
| 13 |
+
out_layers: [4, 11, 17, 23]
|
| 14 |
+
alt_start: -1 # -1 means disable
|
| 15 |
+
qknorm_start: -1
|
| 16 |
+
rope_start: -1
|
| 17 |
+
cat_token: False
|
| 18 |
+
|
| 19 |
+
head:
|
| 20 |
+
__object__:
|
| 21 |
+
path: depth_anything_3.model.dpt
|
| 22 |
+
name: DPT
|
| 23 |
+
args: as_params
|
| 24 |
+
|
| 25 |
+
dim_in: 1024
|
| 26 |
+
output_dim: 1
|
| 27 |
+
features: 256
|
| 28 |
+
out_channels: [256, 512, 1024, 1024]
|
src/depth_anything_3/configs/da3nested-giant-large.yaml
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__object__:
|
| 2 |
+
path: depth_anything_3.model.da3
|
| 3 |
+
name: NestedDepthAnything3Net
|
| 4 |
+
args: as_params
|
| 5 |
+
|
| 6 |
+
anyview:
|
| 7 |
+
__inherit__: depth_anything_3.configs.da3-giant
|
| 8 |
+
|
| 9 |
+
metric:
|
| 10 |
+
__inherit__: depth_anything_3.configs.da3metric-large
|
src/depth_anything_3/model/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
from depth_anything_3.model.da3 import DepthAnything3Net, NestedDepthAnything3Net
|
| 16 |
+
|
| 17 |
+
__export__ = [
|
| 18 |
+
NestedDepthAnything3Net,
|
| 19 |
+
DepthAnything3Net,
|
| 20 |
+
]
|
src/depth_anything_3/model/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (379 Bytes). View file
|
|
|
src/depth_anything_3/model/__pycache__/cam_dec.cpython-311.pyc
ADDED
|
Binary file (2.72 kB). View file
|
|
|
src/depth_anything_3/model/__pycache__/cam_enc.cpython-311.pyc
ADDED
|
Binary file (3.24 kB). View file
|
|
|