334 lines
12 KiB
Python
334 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Data validation schemas for Pokemon data.
|
|
|
|
This module provides JSON schemas and validation functions for Pokemon data
|
|
downloaded from the PokeAPI. It ensures data consistency and catches errors
|
|
early in the data processing pipeline.
|
|
"""
|
|
|
|
import json
|
|
import jsonschema
|
|
from typing import Dict, Any, List
|
|
from pathlib import Path
|
|
|
|
# JSON Schema for Pokemon base stats
|
|
POKEMON_STATS_SCHEMA = {
|
|
"type": "object",
|
|
"properties": {
|
|
"hp": {"type": "integer", "minimum": 0, "maximum": 255},
|
|
"attack": {"type": "integer", "minimum": 0, "maximum": 255},
|
|
"defense": {"type": "integer", "minimum": 0, "maximum": 255},
|
|
"special_attack": {"type": "integer", "minimum": 0, "maximum": 255},
|
|
"special_defense": {"type": "integer", "minimum": 0, "maximum": 255},
|
|
"speed": {"type": "integer", "minimum": 0, "maximum": 255}
|
|
},
|
|
"required": ["hp", "attack", "defense", "special_attack", "special_defense", "speed"],
|
|
"additionalProperties": False
|
|
}
|
|
|
|
# JSON Schema for individual Pokemon data
|
|
POKEMON_SCHEMA = {
|
|
"type": "object",
|
|
"properties": {
|
|
"id": {"type": "integer", "minimum": 1, "maximum": 1010}, # Current max Pokemon ID
|
|
"name": {"type": "string", "minLength": 1},
|
|
"types": {
|
|
"type": "array",
|
|
"items": {"type": "string"},
|
|
"minItems": 1,
|
|
"maxItems": 2,
|
|
"uniqueItems": True
|
|
},
|
|
"base_stats": POKEMON_STATS_SCHEMA,
|
|
"abilities": {
|
|
"type": "array",
|
|
"items": {"type": "string"},
|
|
"minItems": 1,
|
|
"uniqueItems": True
|
|
},
|
|
"moves": {
|
|
"type": "array",
|
|
"items": {"type": "integer", "minimum": 1},
|
|
"uniqueItems": True
|
|
},
|
|
"weight": {"type": "integer", "minimum": 0},
|
|
"height": {"type": "integer", "minimum": 0},
|
|
"base_experience": {"type": "integer", "minimum": 0}
|
|
},
|
|
"required": ["id", "name", "types", "base_stats", "abilities", "moves", "weight", "height", "base_experience"],
|
|
"additionalProperties": False
|
|
}
|
|
|
|
# JSON Schema for Pokemon collection (multiple Pokemon)
|
|
POKEMON_COLLECTION_SCHEMA = {
|
|
"type": "object",
|
|
"patternProperties": {
|
|
"^[0-9]+$": POKEMON_SCHEMA # Keys must be numeric strings (Pokemon IDs)
|
|
},
|
|
"additionalProperties": False
|
|
}
|
|
|
|
# JSON Schema for individual move data
|
|
MOVE_SCHEMA = {
|
|
"type": "object",
|
|
"properties": {
|
|
"id": {"type": "integer", "minimum": 1},
|
|
"name": {"type": "string", "minLength": 1},
|
|
"type": {"type": "string", "minLength": 1},
|
|
"power": {"type": ["integer", "null"], "minimum": 0, "maximum": 250},
|
|
"accuracy": {"type": ["integer", "null"], "minimum": 0, "maximum": 100},
|
|
"pp": {"type": "integer", "minimum": 0, "maximum": 64},
|
|
"priority": {"type": "integer", "minimum": -7, "maximum": 5},
|
|
"damage_class": {
|
|
"type": "string",
|
|
"enum": ["physical", "special", "status"]
|
|
},
|
|
"effect_id": {"type": ["integer", "null"], "minimum": 1},
|
|
"effect_chance": {"type": ["integer", "null"], "minimum": 0, "maximum": 100},
|
|
"target": {"type": "string", "minLength": 1},
|
|
"description": {"type": "string"}
|
|
},
|
|
"required": ["id", "name", "type", "power", "accuracy", "pp", "priority", "damage_class", "effect_id", "effect_chance", "target", "description"],
|
|
"additionalProperties": False
|
|
}
|
|
|
|
# JSON Schema for move collection
|
|
MOVE_COLLECTION_SCHEMA = {
|
|
"type": "object",
|
|
"patternProperties": {
|
|
"^[0-9]+$": MOVE_SCHEMA # Keys must be numeric strings (Move IDs)
|
|
},
|
|
"additionalProperties": False
|
|
}
|
|
|
|
# JSON Schema for type effectiveness entry
|
|
TYPE_EFFECTIVENESS_ENTRY_SCHEMA = {
|
|
"type": "object",
|
|
"properties": {
|
|
"attacking_type": {"type": "string", "minLength": 1},
|
|
"defending_type": {"type": "string", "minLength": 1},
|
|
"damage_factor": {
|
|
"type": "number",
|
|
"enum": [0.0, 0.5, 1.0, 2.0] # Only valid damage multipliers
|
|
}
|
|
},
|
|
"required": ["attacking_type", "defending_type", "damage_factor"],
|
|
"additionalProperties": False
|
|
}
|
|
|
|
# JSON Schema for type effectiveness collection
|
|
TYPE_EFFECTIVENESS_SCHEMA = {
|
|
"type": "array",
|
|
"items": TYPE_EFFECTIVENESS_ENTRY_SCHEMA,
|
|
"uniqueItems": True
|
|
}
|
|
|
|
# Valid Generation 1 types for additional validation
|
|
GEN1_TYPES = {
|
|
'normal', 'fire', 'water', 'electric', 'grass', 'ice',
|
|
'fighting', 'poison', 'ground', 'flying', 'psychic',
|
|
'bug', 'rock', 'ghost', 'dragon'
|
|
}
|
|
|
|
|
|
class DataValidator:
|
|
"""Validator class for Pokemon data using JSON schemas."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the validator with compiled schemas."""
|
|
self.pokemon_validator = jsonschema.Draft7Validator(POKEMON_SCHEMA)
|
|
self.pokemon_collection_validator = jsonschema.Draft7Validator(POKEMON_COLLECTION_SCHEMA)
|
|
self.move_validator = jsonschema.Draft7Validator(MOVE_SCHEMA)
|
|
self.move_collection_validator = jsonschema.Draft7Validator(MOVE_COLLECTION_SCHEMA)
|
|
self.type_effectiveness_validator = jsonschema.Draft7Validator(TYPE_EFFECTIVENESS_SCHEMA)
|
|
|
|
def validate_pokemon(self, pokemon_data: Dict[str, Any]) -> List[str]:
|
|
"""
|
|
Validate a single Pokemon data entry.
|
|
|
|
Args:
|
|
pokemon_data: Dictionary containing Pokemon data
|
|
|
|
Returns:
|
|
List of validation error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
|
|
# Schema validation
|
|
for error in self.pokemon_validator.iter_errors(pokemon_data):
|
|
errors.append(f"Schema error: {error.message}")
|
|
|
|
# Additional business logic validation
|
|
if 'types' in pokemon_data:
|
|
for ptype in pokemon_data['types']:
|
|
if ptype not in GEN1_TYPES:
|
|
errors.append(f"Invalid type '{ptype}' - not a Generation 1 type")
|
|
|
|
# Validate stat totals are reasonable
|
|
if 'base_stats' in pokemon_data:
|
|
stats = pokemon_data['base_stats']
|
|
total_stats = sum(stats.values())
|
|
if total_stats < 180: # Minimum reasonable total (like Sunkern)
|
|
errors.append(f"Base stat total {total_stats} seems too low")
|
|
elif total_stats > 720: # Maximum reasonable total (like Arceus)
|
|
errors.append(f"Base stat total {total_stats} seems too high")
|
|
|
|
return errors
|
|
|
|
def validate_pokemon_collection(self, collection_data: Dict[str, Any]) -> List[str]:
|
|
"""
|
|
Validate a collection of Pokemon data.
|
|
|
|
Args:
|
|
collection_data: Dictionary mapping Pokemon IDs to Pokemon data
|
|
|
|
Returns:
|
|
List of validation error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
|
|
# Schema validation for the collection structure
|
|
for error in self.pokemon_collection_validator.iter_errors(collection_data):
|
|
errors.append(f"Collection schema error: {error.message}")
|
|
|
|
# Validate each Pokemon individually
|
|
for pokemon_id, pokemon_data in collection_data.items():
|
|
pokemon_errors = self.validate_pokemon(pokemon_data)
|
|
for error in pokemon_errors:
|
|
errors.append(f"Pokemon {pokemon_id}: {error}")
|
|
|
|
return errors
|
|
|
|
def validate_move(self, move_data: Dict[str, Any]) -> List[str]:
|
|
"""
|
|
Validate a single move data entry.
|
|
|
|
Args:
|
|
move_data: Dictionary containing move data
|
|
|
|
Returns:
|
|
List of validation error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
|
|
# Schema validation
|
|
for error in self.move_validator.iter_errors(move_data):
|
|
errors.append(f"Schema error: {error.message}")
|
|
|
|
# Additional business logic validation
|
|
if 'type' in move_data:
|
|
if move_data['type'] not in GEN1_TYPES:
|
|
errors.append(f"Invalid move type '{move_data['type']}' - not a Generation 1 type")
|
|
|
|
# Validate power/accuracy combinations make sense
|
|
if 'power' in move_data and 'damage_class' in move_data:
|
|
power = move_data['power']
|
|
damage_class = move_data['damage_class']
|
|
|
|
if damage_class == 'status' and power is not None:
|
|
errors.append("Status moves should not have power")
|
|
elif damage_class in ['physical', 'special'] and power is None:
|
|
errors.append("Damaging moves should have power")
|
|
|
|
return errors
|
|
|
|
def validate_move_collection(self, collection_data: Dict[str, Any]) -> List[str]:
|
|
"""
|
|
Validate a collection of move data.
|
|
|
|
Args:
|
|
collection_data: Dictionary mapping move IDs to move data
|
|
|
|
Returns:
|
|
List of validation error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
|
|
# Schema validation for the collection structure
|
|
for error in self.move_collection_validator.iter_errors(collection_data):
|
|
errors.append(f"Collection schema error: {error.message}")
|
|
|
|
# Validate each move individually
|
|
for move_id, move_data in collection_data.items():
|
|
move_errors = self.validate_move(move_data)
|
|
for error in move_errors:
|
|
errors.append(f"Move {move_id}: {error}")
|
|
|
|
return errors
|
|
|
|
def validate_type_effectiveness(self, effectiveness_data: List[Dict[str, Any]]) -> List[str]:
|
|
"""
|
|
Validate type effectiveness data.
|
|
|
|
Args:
|
|
effectiveness_data: List of type effectiveness entries
|
|
|
|
Returns:
|
|
List of validation error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
|
|
# Schema validation
|
|
for error in self.type_effectiveness_validator.iter_errors(effectiveness_data):
|
|
errors.append(f"Schema error: {error.message}")
|
|
|
|
# Additional validation
|
|
for i, entry in enumerate(effectiveness_data):
|
|
if 'attacking_type' in entry and entry['attacking_type'] not in GEN1_TYPES:
|
|
errors.append(f"Entry {i}: Invalid attacking type '{entry['attacking_type']}'")
|
|
|
|
if 'defending_type' in entry and entry['defending_type'] not in GEN1_TYPES:
|
|
errors.append(f"Entry {i}: Invalid defending type '{entry['defending_type']}'")
|
|
|
|
return errors
|
|
|
|
def validate_file(self, file_path: Path, data_type: str) -> List[str]:
|
|
"""
|
|
Validate a JSON data file.
|
|
|
|
Args:
|
|
file_path: Path to the JSON file
|
|
data_type: Type of data ('pokemon', 'moves', 'types')
|
|
|
|
Returns:
|
|
List of validation error messages (empty if valid)
|
|
"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
except (json.JSONDecodeError, FileNotFoundError) as e:
|
|
return [f"Failed to load file {file_path}: {e}"]
|
|
|
|
if data_type == 'pokemon':
|
|
return self.validate_pokemon_collection(data)
|
|
elif data_type == 'moves':
|
|
return self.validate_move_collection(data)
|
|
elif data_type == 'types':
|
|
return self.validate_type_effectiveness(data)
|
|
else:
|
|
return [f"Unknown data type: {data_type}"]
|
|
|
|
|
|
def save_schemas_to_files(output_dir: Path):
|
|
"""Save JSON schemas to files for external use."""
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
schemas = {
|
|
'pokemon.schema.json': POKEMON_COLLECTION_SCHEMA,
|
|
'moves.schema.json': MOVE_COLLECTION_SCHEMA,
|
|
'type_effectiveness.schema.json': TYPE_EFFECTIVENESS_SCHEMA
|
|
}
|
|
|
|
for filename, schema in schemas.items():
|
|
schema_file = output_dir / filename
|
|
with open(schema_file, 'w', encoding='utf-8') as f:
|
|
json.dump(schema, f, indent=2)
|
|
print(f"Saved schema to {schema_file}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Save schemas to the data directory
|
|
save_schemas_to_files(Path("../../data/validation"))
|