maxmon
commited on
Commit
·
fe413ad
1
Parent(s):
9078d0a
chore: v0.2
Browse files
app.py
CHANGED
|
@@ -18,12 +18,22 @@ def auto_anno(txt, types_txt, radio, need_trans=False):
|
|
| 18 |
result = f'{txt}\n{result}'
|
| 19 |
return result
|
| 20 |
|
| 21 |
-
input1 = gr.Textbox(lines=3, label="输入原句")
|
| 22 |
-
input2 = gr.Textbox(lines=3, label="输入类别")
|
| 23 |
output = gr.Textbox(label="输出结果")
|
| 24 |
-
radio = gr.Radio(["文本分类", "实体抽取"], label="算法类型")
|
| 25 |
checkbox = gr.Checkbox(label="翻译成中文")
|
| 26 |
|
| 27 |
if __name__ == '__main__':
|
| 28 |
-
demo = gr.Interface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
demo.launch(share=False)
|
|
|
|
| 18 |
result = f'{txt}\n{result}'
|
| 19 |
return result
|
| 20 |
|
| 21 |
+
input1 = gr.Textbox(lines=3, label="输入原句", value="Hello world!")
|
| 22 |
+
input2 = gr.Textbox(lines=3, label="输入类别", value="友好、不友好")
|
| 23 |
output = gr.Textbox(label="输出结果")
|
| 24 |
+
radio = gr.Radio(["文本分类", "实体抽取"], label="算法类型", value="文本分类")
|
| 25 |
checkbox = gr.Checkbox(label="翻译成中文")
|
| 26 |
|
| 27 |
if __name__ == '__main__':
|
| 28 |
+
demo = gr.Interface(
|
| 29 |
+
fn=auto_anno,
|
| 30 |
+
description='自动标注,使用了openai免费接口,1分钟内只能请求3次,如遇报错请稍后再试,或clone项目到本地后用自己的key替换。如有疑问欢迎联系微信 maqijun123456',
|
| 31 |
+
inputs=[input1, input2, radio, checkbox],
|
| 32 |
+
examples=[
|
| 33 |
+
['前四个月我国外贸进出口同比增长 5.8%', '政治;经济;科技;文化;娱乐;民生;军事;教育;环保;其它', '文本分类', False],
|
| 34 |
+
['There is a cat trapped on the Avenue of Happiness', '地点', '实体抽取', True],
|
| 35 |
+
['联系方式:18812345678,联系地址:幸福大街20号', '手机号、地址', '实体抽取', False],
|
| 36 |
+
],
|
| 37 |
+
outputs=[output]
|
| 38 |
+
)
|
| 39 |
demo.launch(share=False)
|
utils/anno/cls/__pycache__/text_classification.cpython-310.pyc
CHANGED
|
Binary files a/utils/anno/cls/__pycache__/text_classification.cpython-310.pyc and b/utils/anno/cls/__pycache__/text_classification.cpython-310.pyc differ
|
|
|
utils/anno/cls/text_classification.py
CHANGED
|
@@ -8,8 +8,9 @@ from utils.format.txt_2_list import txt_2_list
|
|
| 8 |
# Set up your API key
|
| 9 |
openai.api_key = openai_key
|
| 10 |
|
| 11 |
-
def text_classification(src_txt, type_arr):
|
| 12 |
-
|
|
|
|
| 13 |
# Call the OpenAI API
|
| 14 |
completion = openai.ChatCompletion.create(
|
| 15 |
model="gpt-3.5-turbo",
|
|
@@ -35,11 +36,14 @@ if __name__ == '__main__':
|
|
| 35 |
type_arr_txt = "天气查询、股票查询、其他"
|
| 36 |
type_arr = txt_2_list(type_arr_txt)
|
| 37 |
txts = [
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
'今天天气怎么样',
|
|
|
|
|
|
|
|
|
|
| 42 |
]
|
| 43 |
for txt in txts:
|
| 44 |
-
result = text_classification(txt, type_arr)
|
| 45 |
print(txt, result)
|
|
|
|
| 8 |
# Set up your API key
|
| 9 |
openai.api_key = openai_key
|
| 10 |
|
| 11 |
+
def text_classification(src_txt, type_arr, history=[]):
|
| 12 |
+
history_txt = ''.join([f'输入|```{q}```输出|{a}\n' for q, a in history])
|
| 13 |
+
user = f"你是一个聪明而且有百年经验的文本分类器. 你的任务是从一段文本里面提取出相应的分类结果签。你的回答必须用统一的格式。文本用```符号分割。分类类型保存在一个数组里{type_arr}\n{history_txt}输入|```{src_txt}```输出|"
|
| 14 |
# Call the OpenAI API
|
| 15 |
completion = openai.ChatCompletion.create(
|
| 16 |
model="gpt-3.5-turbo",
|
|
|
|
| 36 |
type_arr_txt = "天气查询、股票查询、其他"
|
| 37 |
type_arr = txt_2_list(type_arr_txt)
|
| 38 |
txts = [
|
| 39 |
+
'这个商品真不错',
|
| 40 |
+
'用着不行',
|
| 41 |
+
'没用过这么好的东西',
|
| 42 |
+
# '今天天气怎么样',
|
| 43 |
+
]
|
| 44 |
+
history = [
|
| 45 |
+
['这个商品真不错', ['其他']],
|
| 46 |
]
|
| 47 |
for txt in txts:
|
| 48 |
+
result = text_classification(txt, type_arr, history)
|
| 49 |
print(txt, result)
|
utils/anno/ner/__pycache__/entity_extract.cpython-310.pyc
CHANGED
|
Binary files a/utils/anno/ner/__pycache__/entity_extract.cpython-310.pyc and b/utils/anno/ner/__pycache__/entity_extract.cpython-310.pyc differ
|
|
|
utils/anno/ner/entity_extract.py
CHANGED
|
@@ -49,8 +49,9 @@ def extract_named_entities(src_txt, type_arr):
|
|
| 49 |
if ready_keys.__contains__(ready_key):
|
| 50 |
continue
|
| 51 |
item['start'] = i
|
| 52 |
-
item['end'] = i + len(item['name'])
|
| 53 |
break
|
|
|
|
|
|
|
| 54 |
# 将在实体类型里的放入结果
|
| 55 |
result.append(item)
|
| 56 |
ready_key = get_ready_key(item['name'], item['type'], item['start'])
|
|
|
|
| 49 |
if ready_keys.__contains__(ready_key):
|
| 50 |
continue
|
| 51 |
item['start'] = i
|
|
|
|
| 52 |
break
|
| 53 |
+
# 确保实体结尾坐标正确
|
| 54 |
+
item['end'] = item['start'] + len(item['name'])
|
| 55 |
# 将在实体类型里的放入结果
|
| 56 |
result.append(item)
|
| 57 |
ready_key = get_ready_key(item['name'], item['type'], item['start'])
|