Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.gitattributes +1 -0
README.md +165 -0
chat_template.jinja +102 -0
config.json +43 -0
generation_config.json +12 -0
model.safetensors +3 -0
special_tokens_map.json +51 -0
tokenizer.json +3 -0
tokenizer.model +3 -0
tokenizer_config.json +281 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,165 @@

+---
+library_name: transformers
+pipeline_tag: text-generation
+inference: true
+widget:
+  - text: Hello!
+    example_title: Hello world
+    group: Python
+base_model:
+- baidu/ERNIE-4.5-21B-A3B-Thinking
+---
+This tiny model is intended for debugging. It is randomly initialized using the configuration adapted from [baidu/ERNIE-4.5-21B-A3B-Thinking](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking).
+### Example usage:
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load model and tokenizer
+model_id = "yujiepan/ernie-4.5-moe-tiny-random"
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype="bfloat16",
+    trust_remote_code=True,
+)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+# Generate answer
+prompt = "What is AI?"
+input_ids = tokenizer.apply_chat_template(
+    [{"role": "user", "content": prompt}],
+    add_generation_prompt=True,
+    return_tensors="pt",
+    tokenize=True,
+).to(model.device)
+output = model.generate(
+    input_ids,
+    do_sample=True,
+    max_new_tokens=32,
+)
+print(tokenizer.decode(output[0], skip_special_tokens=False))
+```
+### Codes to create this repo:
+```python
+import json
+from pathlib import Path
+import accelerate
+import torch
+from huggingface_hub import file_exists, hf_hub_download
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoProcessor,
+    GenerationConfig,
+    set_seed,
+)
+source_model_id = "baidu/ERNIE-4.5-21B-A3B-Thinking"
+save_folder = "/tmp/yujiepan/ernie-4.5-moe-tiny-random"
+processor = AutoProcessor.from_pretrained(source_model_id, trust_remote_code=True)
+processor.save_pretrained(save_folder)
+with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
+    config_json = json.load(f)
+config_json['hidden_size'] = 8
+config_json['intermediate_size'] = 32
+config_json['moe_intermediate_size'] = 32
+# config_json['moe_k'] = 6
+config_json['head_dim'] = 32
+config_json['num_attention_heads'] = 16
+config_json['num_hidden_layers'] = 2
+config_json['num_key_value_heads'] = 8
+config_json['tie_word_embeddings'] = True
+config_json['use_cache'] = True
+with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
+    json.dump(config_json, f, indent=2)
+config = AutoConfig.from_pretrained(
+    save_folder,
+    trust_remote_code=True,
+)
+print(config)
+torch.set_default_dtype(torch.bfloat16)
+model = AutoModelForCausalLM.from_config(config)
+torch.set_default_dtype(torch.float32)
+if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
+    model.generation_config = GenerationConfig.from_pretrained(
+        source_model_id, trust_remote_code=True,
+    )
+    model.generation_config.do_sample = True
+    print(model.generation_config)
+model = model.cpu()
+with torch.no_grad():
+    for name, p in sorted(model.named_parameters()):
+        torch.nn.init.normal_(p, 0, 0.1)
+        print(name, p.shape)
+model.save_pretrained(save_folder)
+print(model)
+```
+### Printing the model:
+```text
+Ernie4_5_MoeForCausalLM(
+  (model): Ernie4_5_MoeModel(
+    (embed_tokens): Embedding(103424, 8, padding_idx=0)
+    (layers): ModuleList(
+      (0): Ernie4_5_MoeDecoderLayer(
+        (self_attn): Ernie4_5_MoeAttention(
+          (q_proj): Linear(in_features=8, out_features=512, bias=False)
+          (k_proj): Linear(in_features=8, out_features=256, bias=False)
+          (v_proj): Linear(in_features=8, out_features=256, bias=False)
+          (o_proj): Linear(in_features=512, out_features=8, bias=False)
+        )
+        (mlp): Ernie4_5_MoeMLP(
+          (gate_proj): Linear(in_features=8, out_features=32, bias=False)
+          (up_proj): Linear(in_features=8, out_features=32, bias=False)
+          (down_proj): Linear(in_features=32, out_features=8, bias=False)
+          (act_fn): SiLU()
+        )
+        (input_layernorm): Ernie4_5_MoeRMSNorm((8,), eps=1e-05)
+        (post_attention_layernorm): Ernie4_5_MoeRMSNorm((8,), eps=1e-05)
+      )
+      (1): Ernie4_5_MoeDecoderLayer(
+        (self_attn): Ernie4_5_MoeAttention(
+          (q_proj): Linear(in_features=8, out_features=512, bias=False)
+          (k_proj): Linear(in_features=8, out_features=256, bias=False)
+          (v_proj): Linear(in_features=8, out_features=256, bias=False)
+          (o_proj): Linear(in_features=512, out_features=8, bias=False)
+        )
+        (mlp): Ernie4_5_MoeSparseMoeBlock(
+          (moe_statics): Ernie4_5_MoeStatics()
+          (gate): Linear(in_features=8, out_features=64, bias=False)
+          (experts): ModuleList(
+            (0-63): 64 x Ernie4_5_MoeMLP(
+              (gate_proj): Linear(in_features=8, out_features=32, bias=False)
+              (up_proj): Linear(in_features=8, out_features=32, bias=False)
+              (down_proj): Linear(in_features=32, out_features=8, bias=False)
+              (act_fn): SiLU()
+            )
+          )
+          (shared_experts): Ernie4_5_MoeMLP(
+            (gate_proj): Linear(in_features=8, out_features=64, bias=False)
+            (up_proj): Linear(in_features=8, out_features=64, bias=False)
+            (down_proj): Linear(in_features=64, out_features=8, bias=False)
+            (act_fn): SiLU()
+          )
+        )
+        (input_layernorm): Ernie4_5_MoeRMSNorm((8,), eps=1e-05)
+        (post_attention_layernorm): Ernie4_5_MoeRMSNorm((8,), eps=1e-05)
+      )
+    )
+    (norm): Ernie4_5_MoeRMSNorm((8,), eps=1e-05)
+    (rotary_emb): Ernie4_5_MoeRotaryEmbedding()
+  )
+  (lm_head): Linear(in_features=8, out_features=103424, bias=False)
+)
+```

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,102 @@

+{{- '<|im_start|>system
+' }}{%- if messages[0].role != 'system' and not system_settings %}{{- '<global_setting>
+think_mode=True
+</global_setting>' }}{%- else%}{{- '<system_setting>
+' }}{{- system_settings + '
+' if system_settings else '' }}{{- (messages[0].content + '
+' if messages[0].role == 'system' else '') + '</system_setting>
+<global_setting>
+think_mode=True
+</global_setting>' }}{%- endif %}{%- if tools %}{{- "
+<tool_list>" }}{{- '
+' }}{{-'['}}{% for tool in tools %}{{'{"type": "function", "function": '}}{{-(tool.function | tojson)}}}{%-if not loop.last%},{%- endif %}{%endfor%}{{-']'}}{{- "
+</tool_list>" }}{%- endif %}{{-'<|im_end|>
+' }}{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_output>') and message.content.endswith('</tool_output>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '
+' + content + '<|im_end|>' + '
+' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.thoughts is string %}
+            {%- set reasoning_content = message.thoughts %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('
+').split('<think>')[-1].lstrip('
+') %}
+                {%- set content = content.split('</think>')[-1].lstrip('
+') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index and  (loop.last or (not loop.last and reasoning_content)) %} {{- '<|im_start|>' + message.role + '
+<think>
+' + reasoning_content.strip('
+') + '
+</think>
+' }} {%- else %} {{- '<|im_start|>' + message.role + '
+' }} {%- endif %}  {%- if content|length > 0 %}  {{- '<response>
+' + content + '
+</response>
+' }}  {%- endif %} {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '
+' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '
+<tool_call>
+{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}
+</tool_call>
+' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>
+' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>tool' }}
+        {%- endif %}
+        {{- '
+<tool_output>' }}
+        {{- message.content|tojson }}
+        {{- '</tool_output>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>
+' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+ {{- "<|im_start|>assistant
+<think>
+"}}

config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "architectures": [
+    "Ernie4_5_MoeForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "dtype": "bfloat16",
+  "eos_token_id": 2,
+  "head_dim": 32,
+  "hidden_act": "silu",
+  "hidden_size": 8,
+  "initializer_range": 0.02,
+  "intermediate_size": 32,
+  "max_position_embeddings": 131072,
+  "model_type": "ernie4_5_moe",
+  "moe_capacity": [
+    64,
+    64,
+    64
+  ],
+  "moe_gate": "top2_fused",
+  "moe_intermediate_size": 32,
+  "moe_k": 6,
+  "moe_layer_end_index": 1,
+  "moe_layer_interval": 1,
+  "moe_layer_start_index": 1,
+  "moe_norm_min": 1e-12,
+  "moe_num_experts": 64,
+  "moe_num_shared_experts": 2,
+  "moe_use_aux_free": true,
+  "num_attention_heads": 16,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 8,
+  "output_router_logits": false,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000,
+  "router_aux_loss_coef": 0.001,
+  "transformers_version": "4.57.0.dev0",
+  "use_bias": false,
+  "use_cache": true,
+  "vocab_size": 103424
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "bos_token_id": 1,
+  "do_sample": true,
+  "eos_token_id": 2,
+  "frequency_penalty": 0.0,
+  "pad_token_id": 0,
+  "presence_penalty": 0.0,
+  "temperature": 0.6,
+  "top_p": 0.95,
+  "transformers_version": "4.57.0.dev0",
+  "trust_remote_code": true
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ab3ac0ffb09176ac4483830939e7b8e2638ae2b85d5de59fe516afd8b4edec8
+size 1833328

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<|begin_of_sentence|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask:1>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "<|end_of_sentence|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93a291ef6e5c89e6c0f2d5d004734ff429faa8ab925e39d71d2374b3d0daf49e
+size 10999541

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:098d756440604e3829c6c2375f835a82a1968c044b74e561f4b0084e53befd2e
+size 1614702

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,281 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "0",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "4": {
+      "content": "1",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "5": {
+      "content": "2",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "6": {
+      "content": "3",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "7": {
+      "content": "4",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "8": {
+      "content": "5",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "9": {
+      "content": "6",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "10": {
+      "content": "7",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "11": {
+      "content": "8",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "12": {
+      "content": "9",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100272": {
+      "content": "<|end_of_sentence|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100273": {
+      "content": "<|begin_of_sentence|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100274": {
+      "content": "<mask:1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100281": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100282": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100295": {
+      "content": "<tool_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100296": {
+      "content": "</tool_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100297": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100298": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100299": {
+      "content": "<response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100300": {
+      "content": "</response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100301": {
+      "content": "<system_setting>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100302": {
+      "content": "</system_setting>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100303": {
+      "content": "<global_setting>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100304": {
+      "content": "</global_setting>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100305": {
+      "content": "<tool_list>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100306": {
+      "content": "</tool_list>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100307": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100308": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<|begin_of_sentence|>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "header_end_token": "<mask:7>",
+  "header_start_token": "<mask:6>",
+  "legacy": true,
+  "mask_token": "<mask:1>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "sep_token": "<|end_of_sentence|>",
+  "sys_end_token": "<mask:5>",
+  "sys_start_token": "<mask:4>",
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}