Spaces:
Runtime error
Runtime error
more captioning tasks
Browse files- app.py +12 -11
- utils/tasks.py +18 -4
app.py
CHANGED
|
@@ -5,7 +5,8 @@ import spaces
|
|
| 5 |
|
| 6 |
from utils.annotate import annotate_with_boxes
|
| 7 |
from utils.models import load_models, run_inference, CHECKPOINTS
|
| 8 |
-
from utils.tasks import TASK_NAMES, TASKS
|
|
|
|
| 9 |
|
| 10 |
MARKDOWN = """
|
| 11 |
# Better Florence-2 Playground π₯
|
|
@@ -25,12 +26,12 @@ MARKDOWN = """
|
|
| 25 |
</div>
|
| 26 |
"""
|
| 27 |
|
| 28 |
-
OBJECT_DETECTION_EXAMPLES = [
|
| 29 |
-
|
| 30 |
-
]
|
| 31 |
-
CAPTION_EXAMPLES = [
|
| 32 |
-
|
| 33 |
-
]
|
| 34 |
|
| 35 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 36 |
MODELS, PROCESSORS = load_models(DEVICE)
|
|
@@ -41,13 +42,13 @@ def process(checkpoint_dropdown, task_dropdown, image_input):
|
|
| 41 |
model = MODELS[checkpoint_dropdown]
|
| 42 |
processor = PROCESSORS[checkpoint_dropdown]
|
| 43 |
task = TASKS[task_dropdown]
|
| 44 |
-
if task_dropdown ==
|
| 45 |
_, response = run_inference(
|
| 46 |
model, processor, DEVICE, image_input, task)
|
| 47 |
detections = sv.Detections.from_lmm(
|
| 48 |
lmm=sv.LMM.FLORENCE_2, result=response, resolution_wh=image_input.size)
|
| 49 |
return annotate_with_boxes(image_input, detections)
|
| 50 |
-
elif task_dropdown
|
| 51 |
_, response = run_inference(
|
| 52 |
model, processor, DEVICE, image_input, task)
|
| 53 |
return response[task]
|
|
@@ -73,7 +74,7 @@ with gr.Blocks() as demo:
|
|
| 73 |
with gr.Column():
|
| 74 |
@gr.render(inputs=task_dropdown_component)
|
| 75 |
def show_output(text):
|
| 76 |
-
if text ==
|
| 77 |
image_output_component = gr.Image(type='pil', label='Image Output')
|
| 78 |
submit_button_component.click(
|
| 79 |
fn=process,
|
|
@@ -84,7 +85,7 @@ with gr.Blocks() as demo:
|
|
| 84 |
],
|
| 85 |
outputs=image_output_component
|
| 86 |
)
|
| 87 |
-
elif text
|
| 88 |
text_output_component = gr.Textbox(label='Caption Output')
|
| 89 |
submit_button_component.click(
|
| 90 |
fn=process,
|
|
|
|
| 5 |
|
| 6 |
from utils.annotate import annotate_with_boxes
|
| 7 |
from utils.models import load_models, run_inference, CHECKPOINTS
|
| 8 |
+
from utils.tasks import TASK_NAMES, TASKS, OBJECT_DETECTION_TASK_NAME, \
|
| 9 |
+
CAPTION_TASK_NAMES
|
| 10 |
|
| 11 |
MARKDOWN = """
|
| 12 |
# Better Florence-2 Playground π₯
|
|
|
|
| 26 |
</div>
|
| 27 |
"""
|
| 28 |
|
| 29 |
+
# OBJECT_DETECTION_EXAMPLES = [
|
| 30 |
+
# ["microsoft/Florence-2-large-ft", "Object Detection", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
|
| 31 |
+
# ]
|
| 32 |
+
# CAPTION_EXAMPLES = [
|
| 33 |
+
# ["microsoft/Florence-2-large-ft", "Caption", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
|
| 34 |
+
# ]
|
| 35 |
|
| 36 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 37 |
MODELS, PROCESSORS = load_models(DEVICE)
|
|
|
|
| 42 |
model = MODELS[checkpoint_dropdown]
|
| 43 |
processor = PROCESSORS[checkpoint_dropdown]
|
| 44 |
task = TASKS[task_dropdown]
|
| 45 |
+
if task_dropdown == OBJECT_DETECTION_TASK_NAME:
|
| 46 |
_, response = run_inference(
|
| 47 |
model, processor, DEVICE, image_input, task)
|
| 48 |
detections = sv.Detections.from_lmm(
|
| 49 |
lmm=sv.LMM.FLORENCE_2, result=response, resolution_wh=image_input.size)
|
| 50 |
return annotate_with_boxes(image_input, detections)
|
| 51 |
+
elif task_dropdown in CAPTION_TASK_NAMES:
|
| 52 |
_, response = run_inference(
|
| 53 |
model, processor, DEVICE, image_input, task)
|
| 54 |
return response[task]
|
|
|
|
| 74 |
with gr.Column():
|
| 75 |
@gr.render(inputs=task_dropdown_component)
|
| 76 |
def show_output(text):
|
| 77 |
+
if text == OBJECT_DETECTION_TASK_NAME:
|
| 78 |
image_output_component = gr.Image(type='pil', label='Image Output')
|
| 79 |
submit_button_component.click(
|
| 80 |
fn=process,
|
|
|
|
| 85 |
],
|
| 86 |
outputs=image_output_component
|
| 87 |
)
|
| 88 |
+
elif text in CAPTION_TASK_NAMES:
|
| 89 |
text_output_component = gr.Textbox(label='Caption Output')
|
| 90 |
submit_button_component.click(
|
| 91 |
fn=process,
|
utils/tasks.py
CHANGED
|
@@ -1,8 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
TASK_NAMES = [
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
| 4 |
]
|
| 5 |
TASKS = {
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OBJECT_DETECTION_TASK_NAME = "Object Detection"
|
| 2 |
+
CAPTION_TASK_NAME = "Caption"
|
| 3 |
+
DETAILED_CAPTION_TASK_NAME = "Detailed Caption"
|
| 4 |
+
MORE_DETAILED_CAPTION_TASK_NAME = "More Detailed Caption"
|
| 5 |
+
|
| 6 |
TASK_NAMES = [
|
| 7 |
+
OBJECT_DETECTION_TASK_NAME,
|
| 8 |
+
CAPTION_TASK_NAME,
|
| 9 |
+
DETAILED_CAPTION_TASK_NAME,
|
| 10 |
+
MORE_DETAILED_CAPTION_TASK_NAME
|
| 11 |
]
|
| 12 |
TASKS = {
|
| 13 |
+
OBJECT_DETECTION_TASK_NAME: "<OD>",
|
| 14 |
+
CAPTION_TASK_NAME: "<CAPTION>",
|
| 15 |
+
DETAILED_CAPTION_TASK_NAME: "<DETAILED_CAPTION>",
|
| 16 |
+
MORE_DETAILED_CAPTION_TASK_NAME: "<MORE_DETAILED_CAPTION>"
|
| 17 |
}
|
| 18 |
+
CAPTION_TASK_NAMES = [
|
| 19 |
+
CAPTION_TASK_NAME,
|
| 20 |
+
DETAILED_CAPTION_TASK_NAME,
|
| 21 |
+
MORE_DETAILED_CAPTION_TASK_NAME
|
| 22 |
+
]
|