Writer
/

camel-5b-hf

Text Generation

text-generation-inference

Model card Files Files and versions

kiranr commited on Apr 10, 2023

Commit

41a22ce

·

1 Parent(s): c59532d

Update handler.py

Files changed (1) hide show

handler.py +30 -5

handler.py CHANGED Viewed

@@ -6,22 +6,47 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 device = 0 if torch.cuda.is_available() else -1
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
         tokenizer = AutoTokenizer.from_pretrained(path)
-        model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
         # create inference pipeline
-        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
         # pass inputs with all kwargs in data
         if parameters is not None:
-            prediction = self.pipeline(inputs, **parameters)
         else:
-            prediction = self.pipeline(inputs)
         # postprocess the prediction
-        return prediction

 device = 0 if torch.cuda.is_available() else -1
+format_input = (
+    "Below is an instruction that describes a task. "
+    "Write a response that appropriately completes the request.\n\n"
+    "### Instruction:\n{instruction}\n\n### Response:"
+)
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
         tokenizer = AutoTokenizer.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(
+            path,
+            device_map="auto",
+            torch_dtype=torch.float16,
+        )
         # create inference pipeline
+        self.pipeline = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            device=device,
+            max_length=256,
+        )
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
+        text_input = format_input.format(instruction=inputs)
         # pass inputs with all kwargs in data
         if parameters is not None:
+            prediction = self.pipeline(text_input, **parameters)
         else:
+            prediction = self.pipeline(text_input)
         # postprocess the prediction
+        output = [
+            {"generated_text": pred["generated_text"].split("### Response:")[1].strip()}
+            for pred in prediction
+        ]
+        return output