model:
  base_url: "http://127.0.0.1:8000"
  max_tokens: 512
  temperature: 0.1
  timeout: 30

datasets:
  benchmark_dataset:
    file_path: "ner_benchmark_dataset.jsonl"
    sample_size: 100  # Use first 100 examples for quick benchmarking
    instruction_field: "instruction"
    input_field: "input"
    expected_output_field: "response"

metrics:
  # Primary metrics for HuggingFace
  entity_recognition:
    name: "Entity Recognition F1 Score"
    description: "F1 score for named entity recognition accuracy"
    type: "f1"

  precision:
    name: "Precision Score"
    description: "Precision for entity recognition"
    type: "precision"

  recall:
    name: "Recall Score"
    description: "Recall for entity recognition"
    type: "recall"

  latency:
    name: "Average Latency"
    description: "Average response time in milliseconds"
    type: "latency"

  # Entity type specific performance
  entity_types:
    person:
      name: "Person Entity Recognition"
      keywords: ["PERSON", "person", "Person"]
    organization:
      name: "Organization Entity Recognition"
      keywords: ["ORG", "organization", "Organization"]
    location:
      name: "Location Entity Recognition"
      keywords: ["LOC", "location", "Location"]
    miscellaneous:
      name: "Miscellaneous Entity Recognition"
      keywords: ["MISC", "miscellaneous", "Miscellaneous"]

output:
  results_file: "benchmarks.txt"
  detailed_results_file: "benchmark_results.json"
  include_examples: true
  max_examples: 10