Commit
·
c9fc95e
1
Parent(s):
c549c79
update handler
Browse files- generic_ner.py +1 -3
- test.py +46 -0
generic_ner.py
CHANGED
|
@@ -253,9 +253,7 @@ class MultitaskTokenClassificationPipeline(Pipeline):
|
|
| 253 |
field_name = entity["entity"].split(".")[
|
| 254 |
-1
|
| 255 |
] # Last part of the label as the field
|
| 256 |
-
|
| 257 |
-
parent_entity[field_name] = []
|
| 258 |
-
parent_entity[field_name].append(entity)
|
| 259 |
nested = True
|
| 260 |
break
|
| 261 |
|
|
|
|
| 253 |
field_name = entity["entity"].split(".")[
|
| 254 |
-1
|
| 255 |
] # Last part of the label as the field
|
| 256 |
+
parent_entity[field_name] = entity["word"]
|
|
|
|
|
|
|
| 257 |
nested = True
|
| 258 |
break
|
| 259 |
|
test.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Import necessary modules from the transformers library
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
from transformers import AutoModelForTokenClassification, AutoTokenizer
|
| 4 |
+
|
| 5 |
+
# Define the model name to be used for token classification, we use the Impresso NER
|
| 6 |
+
# that can be found at "https://huggingface.co/impresso-project/ner-stacked-bert-multilingual"
|
| 7 |
+
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual"
|
| 8 |
+
|
| 9 |
+
# Load the tokenizer corresponding to the specified model name
|
| 10 |
+
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 11 |
+
|
| 12 |
+
ner_pipeline = pipeline(
|
| 13 |
+
"generic-ner",
|
| 14 |
+
model=MODEL_NAME,
|
| 15 |
+
tokenizer=ner_tokenizer,
|
| 16 |
+
trust_remote_code=True,
|
| 17 |
+
device="cpu",
|
| 18 |
+
)
|
| 19 |
+
sentences = [
|
| 20 |
+
"""In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles,
|
| 21 |
+
where Marie Antoinette, the Queen of France, alongside Maximilien Robespierre, a leading member of the National Assembly,
|
| 22 |
+
debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun,
|
| 23 |
+
regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia,
|
| 24 |
+
George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State,
|
| 25 |
+
were drafting policies for the newly established American government following the signing of the Constitution."""
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
print(sentences[0])
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# Helper function to print entities one per row
|
| 32 |
+
def print_nicely(entities):
|
| 33 |
+
for entity in entities:
|
| 34 |
+
print(
|
| 35 |
+
f"Entity: {entity['entity']} | Confidence: {entity['score']:.2f}% | Text: {entity['word'].strip()} | Start: {entity['start']} | End: {entity['end']}"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# Visualize stacked entities for each sentence
|
| 40 |
+
for sentence in sentences:
|
| 41 |
+
results = ner_pipeline(sentence)
|
| 42 |
+
|
| 43 |
+
# Extract coarse and fine entities
|
| 44 |
+
for key in results.keys():
|
| 45 |
+
# Visualize the coarse entities
|
| 46 |
+
print_nicely(results[key])
|