Files
pokemon-battle-engine/tools/data/schemas.py
2025-08-14 03:48:42 +00:00

334 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Data validation schemas for Pokemon data.
This module provides JSON schemas and validation functions for Pokemon data
downloaded from the PokeAPI. It ensures data consistency and catches errors
early in the data processing pipeline.
"""
import json
import jsonschema
from typing import Dict, Any, List
from pathlib import Path
# JSON Schema for Pokemon base stats
POKEMON_STATS_SCHEMA = {
"type": "object",
"properties": {
"hp": {"type": "integer", "minimum": 0, "maximum": 255},
"attack": {"type": "integer", "minimum": 0, "maximum": 255},
"defense": {"type": "integer", "minimum": 0, "maximum": 255},
"special_attack": {"type": "integer", "minimum": 0, "maximum": 255},
"special_defense": {"type": "integer", "minimum": 0, "maximum": 255},
"speed": {"type": "integer", "minimum": 0, "maximum": 255}
},
"required": ["hp", "attack", "defense", "special_attack", "special_defense", "speed"],
"additionalProperties": False
}
# JSON Schema for individual Pokemon data
POKEMON_SCHEMA = {
"type": "object",
"properties": {
"id": {"type": "integer", "minimum": 1, "maximum": 1010}, # Current max Pokemon ID
"name": {"type": "string", "minLength": 1},
"types": {
"type": "array",
"items": {"type": "string"},
"minItems": 1,
"maxItems": 2,
"uniqueItems": True
},
"base_stats": POKEMON_STATS_SCHEMA,
"abilities": {
"type": "array",
"items": {"type": "string"},
"minItems": 1,
"uniqueItems": True
},
"moves": {
"type": "array",
"items": {"type": "integer", "minimum": 1},
"uniqueItems": True
},
"weight": {"type": "integer", "minimum": 0},
"height": {"type": "integer", "minimum": 0},
"base_experience": {"type": "integer", "minimum": 0}
},
"required": ["id", "name", "types", "base_stats", "abilities", "moves", "weight", "height", "base_experience"],
"additionalProperties": False
}
# JSON Schema for Pokemon collection (multiple Pokemon)
POKEMON_COLLECTION_SCHEMA = {
"type": "object",
"patternProperties": {
"^[0-9]+$": POKEMON_SCHEMA # Keys must be numeric strings (Pokemon IDs)
},
"additionalProperties": False
}
# JSON Schema for individual move data
MOVE_SCHEMA = {
"type": "object",
"properties": {
"id": {"type": "integer", "minimum": 1},
"name": {"type": "string", "minLength": 1},
"type": {"type": "string", "minLength": 1},
"power": {"type": ["integer", "null"], "minimum": 0, "maximum": 250},
"accuracy": {"type": ["integer", "null"], "minimum": 0, "maximum": 100},
"pp": {"type": "integer", "minimum": 0, "maximum": 64},
"priority": {"type": "integer", "minimum": -7, "maximum": 5},
"damage_class": {
"type": "string",
"enum": ["physical", "special", "status"]
},
"effect_id": {"type": ["integer", "null"], "minimum": 1},
"effect_chance": {"type": ["integer", "null"], "minimum": 0, "maximum": 100},
"target": {"type": "string", "minLength": 1},
"description": {"type": "string"}
},
"required": ["id", "name", "type", "power", "accuracy", "pp", "priority", "damage_class", "effect_id", "effect_chance", "target", "description"],
"additionalProperties": False
}
# JSON Schema for move collection
MOVE_COLLECTION_SCHEMA = {
"type": "object",
"patternProperties": {
"^[0-9]+$": MOVE_SCHEMA # Keys must be numeric strings (Move IDs)
},
"additionalProperties": False
}
# JSON Schema for type effectiveness entry
TYPE_EFFECTIVENESS_ENTRY_SCHEMA = {
"type": "object",
"properties": {
"attacking_type": {"type": "string", "minLength": 1},
"defending_type": {"type": "string", "minLength": 1},
"damage_factor": {
"type": "number",
"enum": [0.0, 0.5, 1.0, 2.0] # Only valid damage multipliers
}
},
"required": ["attacking_type", "defending_type", "damage_factor"],
"additionalProperties": False
}
# JSON Schema for type effectiveness collection
TYPE_EFFECTIVENESS_SCHEMA = {
"type": "array",
"items": TYPE_EFFECTIVENESS_ENTRY_SCHEMA,
"uniqueItems": True
}
# Valid Generation 1 types for additional validation
GEN1_TYPES = {
'normal', 'fire', 'water', 'electric', 'grass', 'ice',
'fighting', 'poison', 'ground', 'flying', 'psychic',
'bug', 'rock', 'ghost', 'dragon'
}
class DataValidator:
"""Validator class for Pokemon data using JSON schemas."""
def __init__(self):
"""Initialize the validator with compiled schemas."""
self.pokemon_validator = jsonschema.Draft7Validator(POKEMON_SCHEMA)
self.pokemon_collection_validator = jsonschema.Draft7Validator(POKEMON_COLLECTION_SCHEMA)
self.move_validator = jsonschema.Draft7Validator(MOVE_SCHEMA)
self.move_collection_validator = jsonschema.Draft7Validator(MOVE_COLLECTION_SCHEMA)
self.type_effectiveness_validator = jsonschema.Draft7Validator(TYPE_EFFECTIVENESS_SCHEMA)
def validate_pokemon(self, pokemon_data: Dict[str, Any]) -> List[str]:
"""
Validate a single Pokemon data entry.
Args:
pokemon_data: Dictionary containing Pokemon data
Returns:
List of validation error messages (empty if valid)
"""
errors = []
# Schema validation
for error in self.pokemon_validator.iter_errors(pokemon_data):
errors.append(f"Schema error: {error.message}")
# Additional business logic validation
if 'types' in pokemon_data:
for ptype in pokemon_data['types']:
if ptype not in GEN1_TYPES:
errors.append(f"Invalid type '{ptype}' - not a Generation 1 type")
# Validate stat totals are reasonable
if 'base_stats' in pokemon_data:
stats = pokemon_data['base_stats']
total_stats = sum(stats.values())
if total_stats < 180: # Minimum reasonable total (like Sunkern)
errors.append(f"Base stat total {total_stats} seems too low")
elif total_stats > 720: # Maximum reasonable total (like Arceus)
errors.append(f"Base stat total {total_stats} seems too high")
return errors
def validate_pokemon_collection(self, collection_data: Dict[str, Any]) -> List[str]:
"""
Validate a collection of Pokemon data.
Args:
collection_data: Dictionary mapping Pokemon IDs to Pokemon data
Returns:
List of validation error messages (empty if valid)
"""
errors = []
# Schema validation for the collection structure
for error in self.pokemon_collection_validator.iter_errors(collection_data):
errors.append(f"Collection schema error: {error.message}")
# Validate each Pokemon individually
for pokemon_id, pokemon_data in collection_data.items():
pokemon_errors = self.validate_pokemon(pokemon_data)
for error in pokemon_errors:
errors.append(f"Pokemon {pokemon_id}: {error}")
return errors
def validate_move(self, move_data: Dict[str, Any]) -> List[str]:
"""
Validate a single move data entry.
Args:
move_data: Dictionary containing move data
Returns:
List of validation error messages (empty if valid)
"""
errors = []
# Schema validation
for error in self.move_validator.iter_errors(move_data):
errors.append(f"Schema error: {error.message}")
# Additional business logic validation
if 'type' in move_data:
if move_data['type'] not in GEN1_TYPES:
errors.append(f"Invalid move type '{move_data['type']}' - not a Generation 1 type")
# Validate power/accuracy combinations make sense
if 'power' in move_data and 'damage_class' in move_data:
power = move_data['power']
damage_class = move_data['damage_class']
if damage_class == 'status' and power is not None:
errors.append("Status moves should not have power")
elif damage_class in ['physical', 'special'] and power is None:
errors.append("Damaging moves should have power")
return errors
def validate_move_collection(self, collection_data: Dict[str, Any]) -> List[str]:
"""
Validate a collection of move data.
Args:
collection_data: Dictionary mapping move IDs to move data
Returns:
List of validation error messages (empty if valid)
"""
errors = []
# Schema validation for the collection structure
for error in self.move_collection_validator.iter_errors(collection_data):
errors.append(f"Collection schema error: {error.message}")
# Validate each move individually
for move_id, move_data in collection_data.items():
move_errors = self.validate_move(move_data)
for error in move_errors:
errors.append(f"Move {move_id}: {error}")
return errors
def validate_type_effectiveness(self, effectiveness_data: List[Dict[str, Any]]) -> List[str]:
"""
Validate type effectiveness data.
Args:
effectiveness_data: List of type effectiveness entries
Returns:
List of validation error messages (empty if valid)
"""
errors = []
# Schema validation
for error in self.type_effectiveness_validator.iter_errors(effectiveness_data):
errors.append(f"Schema error: {error.message}")
# Additional validation
for i, entry in enumerate(effectiveness_data):
if 'attacking_type' in entry and entry['attacking_type'] not in GEN1_TYPES:
errors.append(f"Entry {i}: Invalid attacking type '{entry['attacking_type']}'")
if 'defending_type' in entry and entry['defending_type'] not in GEN1_TYPES:
errors.append(f"Entry {i}: Invalid defending type '{entry['defending_type']}'")
return errors
def validate_file(self, file_path: Path, data_type: str) -> List[str]:
"""
Validate a JSON data file.
Args:
file_path: Path to the JSON file
data_type: Type of data ('pokemon', 'moves', 'types')
Returns:
List of validation error messages (empty if valid)
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
except (json.JSONDecodeError, FileNotFoundError) as e:
return [f"Failed to load file {file_path}: {e}"]
if data_type == 'pokemon':
return self.validate_pokemon_collection(data)
elif data_type == 'moves':
return self.validate_move_collection(data)
elif data_type == 'types':
return self.validate_type_effectiveness(data)
else:
return [f"Unknown data type: {data_type}"]
def save_schemas_to_files(output_dir: Path):
"""Save JSON schemas to files for external use."""
output_dir = Path(output_dir)
output_dir.mkdir(exist_ok=True)
schemas = {
'pokemon.schema.json': POKEMON_COLLECTION_SCHEMA,
'moves.schema.json': MOVE_COLLECTION_SCHEMA,
'type_effectiveness.schema.json': TYPE_EFFECTIVENESS_SCHEMA
}
for filename, schema in schemas.items():
schema_file = output_dir / filename
with open(schema_file, 'w', encoding='utf-8') as f:
json.dump(schema, f, indent=2)
print(f"Saved schema to {schema_file}")
if __name__ == "__main__":
# Save schemas to the data directory
save_schemas_to_files(Path("../../data/validation"))