junqiu-dev commited on
Commit
8ccc65f
·
1 Parent(s): 38f51c4

Add YAML metadata to model card

Browse files
Files changed (1) hide show
  1. README.md +16 -1
README.md CHANGED
@@ -1,3 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # opensearch-semantic-highlighter
2
 
3
  ## Overview
@@ -93,7 +108,7 @@ def prepare_input_features(
93
 
94
  # example highlighting case, from OpenSearch documentation
95
  query = "When does OpenSearch use text reanalysis for highlighting?"
96
- document = "To highlight the search terms, the highlighter needs the start and end character offsets of each term. The offsets mark the terms position in the original text. The highlighter can obtain the offsets from the following sources: Postings: When documents are indexed, OpenSearch creates an inverted search index—a core data structure used to search for documents. Postings represent the inverted search index and store the mapping of each analyzed term to the list of documents in which it occurs. If you set the index_options parameter to offsets when mapping a text field, OpenSearch adds each terms start and end character offsets to the inverted index. During highlighting, the highlighter reruns the original query directly on the postings to locate each term. Thus, storing offsets makes highlighting more efficient for large fields because it does not require reanalyzing the text. Storing term offsets requires additional disk space, but uses less disk space than storing term vectors. Text reanalysis: In the absence of both postings and term vectors, the highlighter reanalyzes text in order to highlight it. For every document and every field that needs highlighting, the highlighter creates a small in-memory index and reruns the original query through Lucenes query execution planner to access low-level match information for the current document. Reanalyzing the text works well in most use cases. However, this method is more memory and time intensive for large fields."
97
 
98
  # sentence-level parsing
99
  sentence_ids = []
 
1
+ ---
2
+ language: en
3
+ license: apache-2.0
4
+ library_name: transformers
5
+ tags:
6
+ - opensearch
7
+ - semantic-search
8
+ - highlighting
9
+ - sentence-highlighter
10
+ - bert
11
+ - text-classification
12
+ - pytorch
13
+ pipeline_tag: text-classification
14
+ ---
15
+
16
  # opensearch-semantic-highlighter
17
 
18
  ## Overview
 
108
 
109
  # example highlighting case, from OpenSearch documentation
110
  query = "When does OpenSearch use text reanalysis for highlighting?"
111
+ document = "To highlight the search terms, the highlighter needs the start and end character offsets of each term. The offsets mark the term's position in the original text. The highlighter can obtain the offsets from the following sources: Postings: When documents are indexed, OpenSearch creates an inverted search index—a core data structure used to search for documents. Postings represent the inverted search index and store the mapping of each analyzed term to the list of documents in which it occurs. If you set the index_options parameter to offsets when mapping a text field, OpenSearch adds each term's start and end character offsets to the inverted index. During highlighting, the highlighter reruns the original query directly on the postings to locate each term. Thus, storing offsets makes highlighting more efficient for large fields because it does not require reanalyzing the text. Storing term offsets requires additional disk space, but uses less disk space than storing term vectors. Text reanalysis: In the absence of both postings and term vectors, the highlighter reanalyzes text in order to highlight it. For every document and every field that needs highlighting, the highlighter creates a small in-memory index and reruns the original query through Lucene's query execution planner to access low-level match information for the current document. Reanalyzing the text works well in most use cases. However, this method is more memory and time intensive for large fields."
112
 
113
  # sentence-level parsing
114
  sentence_ids = []