make compatible with transformers 4.49+ (#43)

- make compatible with transformers 4.49+ (1a5be979ef53262299363e521777777d1f356869)

Co-authored-by: Blair Chintella <[email protected]>

Files changed (1) hide show

modeling_chatglm.py +13 -7

modeling_chatglm.py CHANGED Viewed

@@ -1082,19 +1082,22 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             outputs: ModelOutput,
             model_kwargs: Dict[str, Any],
             is_encoder_decoder: bool = False,
     ) -> Dict[str, Any]:
-        # update past_key_values
-        cache_name, cache = self._extract_past_from_model_output(outputs)
-        model_kwargs[cache_name] = cache
-        # update attention mask
         if "attention_mask" in model_kwargs:
             attention_mask = model_kwargs["attention_mask"]
             model_kwargs["attention_mask"] = torch.cat(
                 [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
             )
-        # update position ids
         if "position_ids" in model_kwargs:
             position_ids = model_kwargs["position_ids"]
             new_position_id = position_ids[..., -1:].clone()
@@ -1102,8 +1105,11 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             model_kwargs["position_ids"] = torch.cat(
                 [position_ids, new_position_id], dim=-1
             )
         model_kwargs["is_first_forward"] = False
         return model_kwargs
     def prepare_inputs_for_generation(

             outputs: ModelOutput,
             model_kwargs: Dict[str, Any],
             is_encoder_decoder: bool = False,
+            num_new_tokens: int = 1,
     ) -> Dict[str, Any]:
+        for possible_cache_name in ["past_key_values", "mems", "past_buckets_states", "cache_params"]:
+            if hasattr(outputs, possible_cache_name):
+                if possible_cache_name in ("past_buckets_states", "mems"):
+                    cache_name = "past_key_values"
+                else:
+                    cache_name = possible_cache_name
+                model_kwargs[cache_name] = getattr(outputs, possible_cache_name)
+                break
         if "attention_mask" in model_kwargs:
             attention_mask = model_kwargs["attention_mask"]
             model_kwargs["attention_mask"] = torch.cat(
                 [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
             )
         if "position_ids" in model_kwargs:
             position_ids = model_kwargs["position_ids"]
             new_position_id = position_ids[..., -1:].clone()
             model_kwargs["position_ids"] = torch.cat(
                 [position_ids, new_position_id], dim=-1
             )
         model_kwargs["is_first_forward"] = False
+        if model_kwargs.get("use_cache", True) and "cache_position" in model_kwargs:
+            model_kwargs["cache_position"] = model_kwargs["cache_position"][-1:] + num_new_tokens
         return model_kwargs
     def prepare_inputs_for_generation(