| name: trocr_image2text | |
| config_type: model | |
| encoder: | |
| config_type: model | |
| hidden_size: 768 | |
| num_hidden_layers: 12 | |
| num_attention_heads: 12 | |
| intermediate_size: 3072 | |
| hidden_act: gelu | |
| hidden_dropout_prob: 0.0 | |
| attention_probs_dropout_prob: 0.0 | |
| initializer_range: 0.02 | |
| layer_norm_eps: 1.0e-12 | |
| image_size: 224 | |
| patch_size: 16 | |
| num_channels: 3 | |
| qkv_bias: true | |
| encoder_stride: 16 | |
| decoder: | |
| config_type: model | |
| is_decoder: true | |
| add_cross_attention: true | |
| attention_probs_dropout_prob: 0.1 | |
| bos_token_id: 0 | |
| eos_token_id: 2 | |
| classifier_dropout: null | |
| gradient_checkpointing: false | |
| hidden_act: gelu | |
| hidden_dropout_prob: 0.1 | |
| hidden_size: 768 | |
| initializer_range: 0.02 | |
| intermediate_size: 3072 | |
| layer_norm_eps: 1.0e-12 | |
| max_position_embeddings: 514 | |
| num_attention_heads: 12 | |
| num_hidden_layers: 12 | |
| pad_token_id: 1 | |
| position_embedding_type: absolute | |
| type_vocab_size: 1 | |
| use_cache: true | |
| vocab_size: 42000 | |
| generation: | |
| bos_token_id: 0 | |
| decoder_start_token_id: 0 | |
| early_stopping: true | |
| eos_token_id: 2 | |
| length_penalty: 2.0 | |
| max_length: 64 | |
| no_repeat_ngram_size: 3 | |
| num_beams: 4 | |
| pad_token_id: 1 | |