From b3c08264645f167ef31ea0f0d99ab3d14359eb7b Mon Sep 17 00:00:00 2001 From: cdemeyer-teachx Date: Thu, 14 Aug 2025 03:48:42 +0000 Subject: [PATCH] bcda245b-c1ea-48f1-97fd-29c2953ff86d --- POKEMON_DOWNLOADER_SUMMARY.md | 204 ++++++++++ data/pokemon_1_2.json | 221 +++++++++++ example_data/charizard.json | 158 ++++++++ example_data/charizard_moves.json | 212 ++++++++++ example_data/classic_moves.json | 72 ++++ example_data/starter_pokemon.json | 342 ++++++++++++++++ example_data/type_chart.json | 412 +++++++++++++++++++ tools/data/README.md | 305 ++++++++++++++ tools/data/example_usage.py | 228 +++++++++++ tools/data/pokemon_downloader.py | 640 ++++++++++++++++++++++++++++++ tools/data/schemas.py | 333 ++++++++++++++++ tools/data/test_downloader.py | 295 ++++++++++++++ tools/requirements.txt | 1 + 13 files changed, 3423 insertions(+) create mode 100644 POKEMON_DOWNLOADER_SUMMARY.md create mode 100644 data/pokemon_1_2.json create mode 100644 example_data/charizard.json create mode 100644 example_data/charizard_moves.json create mode 100644 example_data/classic_moves.json create mode 100644 example_data/starter_pokemon.json create mode 100644 example_data/type_chart.json create mode 100644 tools/data/README.md create mode 100644 tools/data/example_usage.py create mode 100644 tools/data/pokemon_downloader.py create mode 100644 tools/data/schemas.py create mode 100644 tools/data/test_downloader.py diff --git a/POKEMON_DOWNLOADER_SUMMARY.md b/POKEMON_DOWNLOADER_SUMMARY.md new file mode 100644 index 0000000..0573de3 --- /dev/null +++ b/POKEMON_DOWNLOADER_SUMMARY.md @@ -0,0 +1,204 @@ +# Pokemon Data Downloader - Implementation Summary + +## Overview + +Successfully implemented a comprehensive Pokemon data downloader tool for the Pokemon Battle Simulator project. The tool uses `pokebase==1.4.1` to download Pokemon data from the PokeAPI with support for segmented downloading, data validation, and battle-ready data export. + +## Features Implemented + +### ✅ Core Functionality +- **Segmented Downloads**: Download specific ranges of Pokemon (e.g., 1-10, 25-30) for testing and incremental collection +- **Concurrent Processing**: Multi-threaded downloads with configurable worker counts +- **Rate Limiting**: Respectful API usage with 100ms delays between requests +- **Error Handling**: Automatic retry logic with exponential backoff +- **Progress Tracking**: Beautiful progress bars using Rich library + +### ✅ Data Types Supported +- **Pokemon Data**: Complete species information including stats, types, abilities, and move lists +- **Move Data**: Comprehensive move information with power, accuracy, PP, type, and descriptions +- **Type Effectiveness**: Complete type matchup chart for damage calculations + +### ✅ Data Validation +- **JSON Schema Validation**: Comprehensive schemas for all data types +- **Generation 1 Focus**: Validates only Gen 1 types and ensures data consistency +- **Error Reporting**: Clear validation warnings without blocking data saves +- **Business Logic Validation**: Additional checks for stat totals and type combinations + +### ✅ CLI Interface +- **Multiple Commands**: Separate commands for Pokemon, moves, types, and complete downloads +- **Flexible Options**: Configurable output directories, worker counts, and validation settings +- **Help System**: Comprehensive help documentation for all commands + +### ✅ Python API +- **Object-Oriented Design**: Clean class-based architecture with PokemonDownloader +- **Data Classes**: Structured data representation with dataclasses +- **Type Hints**: Full type annotation for better IDE support and code quality + +## File Structure + +``` +tools/ +├── requirements.txt # Updated with pokebase==1.4.1 +└── data/ + ├── __init__.py + ├── pokemon_downloader.py # Main downloader implementation + ├── schemas.py # Data validation schemas + ├── test_downloader.py # Comprehensive test suite + ├── example_usage.py # Usage examples and patterns + └── README.md # Complete documentation +``` + +## Testing Results + +All tests pass successfully: + +``` +✅ Pokemon Download - Downloads Pokemon data correctly +✅ Moves Download - Downloads move data with proper validation +✅ Type Effectiveness - Downloads complete type chart +✅ Data Validation - Validates data integrity +✅ Integrated Download - Downloads Pokemon with their moves +``` + +## Usage Examples + +### CLI Usage + +```bash +# Download small segments for testing +python -m tools.data.pokemon_downloader download-pokemon --start 1 --end 5 + +# Download with moves included +python -m tools.data.pokemon_downloader download-pokemon --start 1 --end 10 --include-moves + +# Download specific moves +python -m tools.data.pokemon_downloader download-moves --move-ids "1,2,3,4,5" + +# Download type effectiveness +python -m tools.data.pokemon_downloader download-types + +# Download complete Gen 1 dataset +python -m tools.data.pokemon_downloader download-complete --start 1 --end 151 +``` + +### Python API Usage + +```python +from tools.data.pokemon_downloader import PokemonDownloader + +# Initialize downloader +downloader = PokemonDownloader(output_dir="my_data") + +# Download Pokemon batch +pokemon_data = downloader.download_pokemon_batch(1, 10) +downloader.save_pokemon_data(pokemon_data, "pokemon.json") + +# Download moves +moves_data = downloader.download_moves_batch([1, 2, 3, 4, 5]) +downloader.save_moves_data(moves_data, "moves.json") +``` + +## Data Format + +### Pokemon Data Structure +```json +{ + "1": { + "id": 1, + "name": "bulbasaur", + "types": ["grass", "poison"], + "base_stats": { + "hp": 45, "attack": 49, "defense": 49, + "special_attack": 65, "special_defense": 65, "speed": 45 + }, + "abilities": ["overgrow", "chlorophyll"], + "moves": [1, 2, 3, ...], + "weight": 69, + "height": 7, + "base_experience": 64 + } +} +``` + +### Move Data Structure +```json +{ + "1": { + "id": 1, + "name": "pound", + "type": "normal", + "power": 40, + "accuracy": 100, + "pp": 35, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage." + } +} +``` + +## Performance Characteristics + +- **Rate Limited**: 100ms between API calls to respect PokeAPI +- **Concurrent**: 5 workers by default, configurable up to reasonable limits +- **Memory Efficient**: Processes data in batches to manage memory usage +- **Cached**: API responses cached to avoid redundant requests +- **Validated**: Optional data validation with detailed error reporting + +## Integration with C++ Battle Simulator + +The exported JSON files are designed for easy C++ integration: + +1. **Consistent IDs**: All Pokemon and moves use consistent numeric IDs +2. **Battle-Ready Stats**: Direct mapping to battle calculation needs +3. **Complete Type Data**: Full type effectiveness chart for damage calculations +4. **Structured Format**: Clean JSON structure for parsing + +## Tested Scenarios + +### Small Segments (Recommended for Testing) +- ✅ First 3 Pokemon (Bulbasaur line) +- ✅ Single Pokemon (Pikachu, Charizard) +- ✅ Specific move sets (classic moves) +- ✅ Type effectiveness chart + +### Production Scenarios +- ✅ Batch downloads (1-50, 51-100, etc.) +- ✅ Complete Gen 1 dataset (1-151) +- ✅ Move validation and filtering +- ✅ Error recovery and retry logic + +## Key Benefits + +1. **Segmented Approach**: Can download small test datasets before committing to full downloads +2. **Battle-Focused**: Data structure optimized for Pokemon battle simulation +3. **Validated Data**: Comprehensive validation ensures data quality +4. **Extensible**: Easy to extend for additional generations or data types +5. **Production-Ready**: Includes error handling, logging, and performance optimizations + +## Files Generated + +The tool has been tested and generates the following example files: + +``` +data/pokemon_1_2.json # CLI test output +example_data/starter_pokemon.json # First 3 Pokemon +example_data/classic_moves.json # Classic moves +example_data/charizard.json # Single Pokemon +example_data/charizard_moves.json # Pokemon's moves +example_data/type_chart.json # Type effectiveness +``` + +## Next Steps + +The tool is ready for production use. Recommended workflow: + +1. **Start Small**: Test with `--start 1 --end 5` to verify setup +2. **Incremental Downloads**: Download in batches of 50 Pokemon +3. **Validate Data**: Review validation warnings and adjust as needed +4. **Integrate**: Use JSON files in C++ battle simulator + +The Pokemon data downloader successfully meets all requirements and is ready for regular use in the Pokemon Battle Simulator project. diff --git a/data/pokemon_1_2.json b/data/pokemon_1_2.json new file mode 100644 index 0000000..7479801 --- /dev/null +++ b/data/pokemon_1_2.json @@ -0,0 +1,221 @@ +{ + "2": { + "id": 2, + "name": "ivysaur", + "types": [ + "grass", + "poison" + ], + "base_stats": { + "hp": 60, + "attack": 62, + "defense": 63, + "special_attack": 80, + "special_defense": 80, + "speed": 60 + }, + "abilities": [ + "overgrow", + "chlorophyll" + ], + "moves": [ + 14, + 15, + 20, + 22, + 29, + 33, + 34, + 36, + 38, + 45, + 46, + 70, + 72, + 73, + 74, + 75, + 76, + 77, + 79, + 80, + 81, + 92, + 99, + 102, + 104, + 111, + 113, + 115, + 117, + 133, + 148, + 156, + 164, + 173, + 174, + 182, + 188, + 189, + 200, + 202, + 203, + 204, + 206, + 207, + 210, + 213, + 214, + 216, + 218, + 219, + 230, + 235, + 237, + 241, + 249, + 263, + 267, + 270, + 275, + 282, + 290, + 311, + 331, + 345, + 363, + 388, + 402, + 412, + 437, + 438, + 445, + 447, + 474, + 491, + 496, + 497, + 520, + 526, + 580, + 590, + 803, + 851, + 885 + ], + "weight": 130, + "height": 10, + "base_experience": 142 + }, + "1": { + "id": 1, + "name": "bulbasaur", + "types": [ + "grass", + "poison" + ], + "base_stats": { + "hp": 45, + "attack": 49, + "defense": 49, + "special_attack": 65, + "special_defense": 65, + "speed": 45 + }, + "abilities": [ + "overgrow", + "chlorophyll" + ], + "moves": [ + 13, + 14, + 15, + 20, + 22, + 29, + 33, + 34, + 36, + 38, + 45, + 70, + 72, + 73, + 74, + 75, + 76, + 77, + 79, + 80, + 81, + 92, + 99, + 102, + 104, + 111, + 113, + 115, + 117, + 124, + 130, + 133, + 148, + 156, + 164, + 173, + 174, + 182, + 188, + 189, + 200, + 202, + 203, + 204, + 206, + 207, + 210, + 213, + 214, + 216, + 218, + 219, + 230, + 235, + 237, + 241, + 249, + 263, + 267, + 270, + 275, + 282, + 290, + 311, + 320, + 331, + 345, + 363, + 388, + 402, + 412, + 437, + 438, + 445, + 447, + 474, + 491, + 496, + 497, + 520, + 526, + 580, + 590, + 803, + 851, + 885 + ], + "weight": 69, + "height": 7, + "base_experience": 64 + } +} \ No newline at end of file diff --git a/example_data/charizard.json b/example_data/charizard.json new file mode 100644 index 0000000..b085ffc --- /dev/null +++ b/example_data/charizard.json @@ -0,0 +1,158 @@ +{ + "6": { + "id": 6, + "name": "charizard", + "types": [ + "fire", + "flying" + ], + "base_stats": { + "hp": 78, + "attack": 84, + "defense": 78, + "special_attack": 109, + "special_defense": 85, + "speed": 100 + }, + "abilities": [ + "blaze", + "solar-power" + ], + "moves": [ + 5, + 7, + 9, + 10, + 14, + 15, + 17, + 19, + 25, + 29, + 34, + 36, + 38, + 43, + 44, + 45, + 46, + 52, + 53, + 63, + 66, + 68, + 69, + 70, + 76, + 82, + 83, + 89, + 90, + 91, + 92, + 99, + 102, + 104, + 108, + 111, + 115, + 117, + 126, + 129, + 130, + 154, + 156, + 157, + 163, + 164, + 173, + 174, + 182, + 184, + 187, + 189, + 200, + 201, + 203, + 206, + 207, + 210, + 211, + 213, + 214, + 216, + 218, + 223, + 225, + 231, + 232, + 237, + 239, + 241, + 242, + 246, + 249, + 251, + 257, + 261, + 263, + 264, + 270, + 280, + 290, + 299, + 307, + 311, + 314, + 315, + 317, + 332, + 337, + 349, + 355, + 363, + 366, + 374, + 394, + 403, + 406, + 407, + 411, + 416, + 421, + 424, + 432, + 445, + 466, + 468, + 481, + 488, + 496, + 497, + 507, + 510, + 512, + 517, + 519, + 523, + 525, + 526, + 535, + 542, + 590, + 595, + 612, + 693, + 784, + 799, + 814, + 815, + 851, + 913, + 915 + ], + "weight": 905, + "height": 17, + "base_experience": 240 + } +} \ No newline at end of file diff --git a/example_data/charizard_moves.json b/example_data/charizard_moves.json new file mode 100644 index 0000000..6a06c3d --- /dev/null +++ b/example_data/charizard_moves.json @@ -0,0 +1,212 @@ +{ + "5": { + "id": 5, + "name": "mega-punch", + "type": "normal", + "power": 80, + "accuracy": 85, + "pp": 20, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage with no additional effect." + }, + "7": { + "id": 7, + "name": "fire-punch", + "type": "fire", + "power": 75, + "accuracy": 100, + "pp": 15, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": 10, + "target": "selected-pokemon", + "description": "Has a 10% chance to burn the target." + }, + "9": { + "id": 9, + "name": "thunder-punch", + "type": "electric", + "power": 75, + "accuracy": 100, + "pp": 15, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": 10, + "target": "selected-pokemon", + "description": "Has a 10% chance to paralyze the target." + }, + "10": { + "id": 10, + "name": "scratch", + "type": "normal", + "power": 40, + "accuracy": 100, + "pp": 35, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage with no additional effect." + }, + "17": { + "id": 17, + "name": "wing-attack", + "type": "flying", + "power": 60, + "accuracy": 100, + "pp": 35, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage with no additional effect." + }, + "14": { + "id": 14, + "name": "swords-dance", + "type": "normal", + "power": null, + "accuracy": null, + "pp": 20, + "priority": 0, + "damage_class": "status", + "effect_id": null, + "effect_chance": null, + "target": "user", + "description": "Raises the user's Attack by two stages." + }, + "15": { + "id": 15, + "name": "cut", + "type": "normal", + "power": 50, + "accuracy": 95, + "pp": 30, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage with no additional effect." + }, + "19": { + "id": 19, + "name": "fly", + "type": "flying", + "power": 90, + "accuracy": 95, + "pp": 15, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "User flies high into the air, dodging all attacks, and hits next turn." + }, + "25": { + "id": 25, + "name": "mega-kick", + "type": "normal", + "power": 120, + "accuracy": 75, + "pp": 5, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage with no additional effect." + }, + "29": { + "id": 29, + "name": "headbutt", + "type": "normal", + "power": 70, + "accuracy": 100, + "pp": 15, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": 30, + "target": "selected-pokemon", + "description": "Has a 30% chance to make the target flinch." + }, + "34": { + "id": 34, + "name": "body-slam", + "type": "normal", + "power": 85, + "accuracy": 100, + "pp": 15, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": 30, + "target": "selected-pokemon", + "description": "Has a 30% chance to paralyze the target." + }, + "36": { + "id": 36, + "name": "take-down", + "type": "normal", + "power": 90, + "accuracy": 85, + "pp": 20, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "User receives 1/4 the damage it inflicts in recoil." + }, + "43": { + "id": 43, + "name": "leer", + "type": "normal", + "power": null, + "accuracy": 100, + "pp": 30, + "priority": 0, + "damage_class": "status", + "effect_id": null, + "effect_chance": 100, + "target": "all-opponents", + "description": "Lowers the target's Defense by one stage." + }, + "38": { + "id": 38, + "name": "double-edge", + "type": "normal", + "power": 120, + "accuracy": 100, + "pp": 15, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "User receives 1/3 the damage inflicted in recoil." + }, + "44": { + "id": 44, + "name": "bite", + "type": "dark", + "power": 60, + "accuracy": 100, + "pp": 25, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": 30, + "target": "selected-pokemon", + "description": "Has a 30% chance to make the target flinch." + } +} \ No newline at end of file diff --git a/example_data/classic_moves.json b/example_data/classic_moves.json new file mode 100644 index 0000000..04aa2be --- /dev/null +++ b/example_data/classic_moves.json @@ -0,0 +1,72 @@ +{ + "1": { + "id": 1, + "name": "pound", + "type": "normal", + "power": 40, + "accuracy": 100, + "pp": 35, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage with no additional effect." + }, + "2": { + "id": 2, + "name": "karate-chop", + "type": "fighting", + "power": 50, + "accuracy": 100, + "pp": 25, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Has an increased chance for a critical hit." + }, + "33": { + "id": 33, + "name": "tackle", + "type": "normal", + "power": 40, + "accuracy": 100, + "pp": 35, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage with no additional effect." + }, + "34": { + "id": 34, + "name": "body-slam", + "type": "normal", + "power": 85, + "accuracy": 100, + "pp": 15, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": 30, + "target": "selected-pokemon", + "description": "Has a 30% chance to paralyze the target." + }, + "36": { + "id": 36, + "name": "take-down", + "type": "normal", + "power": 90, + "accuracy": 85, + "pp": 20, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "User receives 1/4 the damage it inflicts in recoil." + } +} \ No newline at end of file diff --git a/example_data/starter_pokemon.json b/example_data/starter_pokemon.json new file mode 100644 index 0000000..789b600 --- /dev/null +++ b/example_data/starter_pokemon.json @@ -0,0 +1,342 @@ +{ + "2": { + "id": 2, + "name": "ivysaur", + "types": [ + "grass", + "poison" + ], + "base_stats": { + "hp": 60, + "attack": 62, + "defense": 63, + "special_attack": 80, + "special_defense": 80, + "speed": 60 + }, + "abilities": [ + "overgrow", + "chlorophyll" + ], + "moves": [ + 14, + 15, + 20, + 22, + 29, + 33, + 34, + 36, + 38, + 45, + 46, + 70, + 72, + 73, + 74, + 75, + 76, + 77, + 79, + 80, + 81, + 92, + 99, + 102, + 104, + 111, + 113, + 115, + 117, + 133, + 148, + 156, + 164, + 173, + 174, + 182, + 188, + 189, + 200, + 202, + 203, + 204, + 206, + 207, + 210, + 213, + 214, + 216, + 218, + 219, + 230, + 235, + 237, + 241, + 249, + 263, + 267, + 270, + 275, + 282, + 290, + 311, + 331, + 345, + 363, + 388, + 402, + 412, + 437, + 438, + 445, + 447, + 474, + 491, + 496, + 497, + 520, + 526, + 580, + 590, + 803, + 851, + 885 + ], + "weight": 130, + "height": 10, + "base_experience": 142 + }, + "1": { + "id": 1, + "name": "bulbasaur", + "types": [ + "grass", + "poison" + ], + "base_stats": { + "hp": 45, + "attack": 49, + "defense": 49, + "special_attack": 65, + "special_defense": 65, + "speed": 45 + }, + "abilities": [ + "overgrow", + "chlorophyll" + ], + "moves": [ + 13, + 14, + 15, + 20, + 22, + 29, + 33, + 34, + 36, + 38, + 45, + 70, + 72, + 73, + 74, + 75, + 76, + 77, + 79, + 80, + 81, + 92, + 99, + 102, + 104, + 111, + 113, + 115, + 117, + 124, + 130, + 133, + 148, + 156, + 164, + 173, + 174, + 182, + 188, + 189, + 200, + 202, + 203, + 204, + 206, + 207, + 210, + 213, + 214, + 216, + 218, + 219, + 230, + 235, + 237, + 241, + 249, + 263, + 267, + 270, + 275, + 282, + 290, + 311, + 320, + 331, + 345, + 363, + 388, + 402, + 412, + 437, + 438, + 445, + 447, + 474, + 491, + 496, + 497, + 520, + 526, + 580, + 590, + 803, + 851, + 885 + ], + "weight": 69, + "height": 7, + "base_experience": 64 + }, + "3": { + "id": 3, + "name": "venusaur", + "types": [ + "grass", + "poison" + ], + "base_stats": { + "hp": 80, + "attack": 82, + "defense": 83, + "special_attack": 100, + "special_defense": 100, + "speed": 80 + }, + "abilities": [ + "overgrow", + "chlorophyll" + ], + "moves": [ + 14, + 15, + 20, + 22, + 29, + 33, + 34, + 36, + 38, + 45, + 46, + 63, + 70, + 72, + 73, + 74, + 75, + 76, + 77, + 79, + 80, + 81, + 89, + 92, + 99, + 102, + 104, + 111, + 113, + 115, + 117, + 133, + 148, + 156, + 164, + 173, + 174, + 182, + 184, + 188, + 189, + 200, + 202, + 203, + 204, + 206, + 207, + 210, + 213, + 214, + 216, + 218, + 219, + 230, + 235, + 237, + 241, + 249, + 263, + 267, + 270, + 275, + 282, + 290, + 311, + 331, + 335, + 338, + 345, + 363, + 388, + 398, + 402, + 412, + 414, + 416, + 431, + 437, + 438, + 445, + 447, + 474, + 491, + 496, + 497, + 520, + 523, + 526, + 572, + 580, + 590, + 707, + 803, + 805, + 851, + 885 + ], + "weight": 1000, + "height": 20, + "base_experience": 236 + } +} \ No newline at end of file diff --git a/example_data/type_chart.json b/example_data/type_chart.json new file mode 100644 index 0000000..ec16888 --- /dev/null +++ b/example_data/type_chart.json @@ -0,0 +1,412 @@ +[ + { + "attacking_type": "normal", + "defending_type": "rock", + "damage_factor": 0.5 + }, + { + "attacking_type": "normal", + "defending_type": "ghost", + "damage_factor": 0.0 + }, + { + "attacking_type": "fire", + "defending_type": "bug", + "damage_factor": 2.0 + }, + { + "attacking_type": "fire", + "defending_type": "grass", + "damage_factor": 2.0 + }, + { + "attacking_type": "fire", + "defending_type": "ice", + "damage_factor": 2.0 + }, + { + "attacking_type": "fire", + "defending_type": "rock", + "damage_factor": 0.5 + }, + { + "attacking_type": "fire", + "defending_type": "fire", + "damage_factor": 0.5 + }, + { + "attacking_type": "fire", + "defending_type": "water", + "damage_factor": 0.5 + }, + { + "attacking_type": "fire", + "defending_type": "dragon", + "damage_factor": 0.5 + }, + { + "attacking_type": "water", + "defending_type": "ground", + "damage_factor": 2.0 + }, + { + "attacking_type": "water", + "defending_type": "rock", + "damage_factor": 2.0 + }, + { + "attacking_type": "water", + "defending_type": "fire", + "damage_factor": 2.0 + }, + { + "attacking_type": "water", + "defending_type": "water", + "damage_factor": 0.5 + }, + { + "attacking_type": "water", + "defending_type": "grass", + "damage_factor": 0.5 + }, + { + "attacking_type": "water", + "defending_type": "dragon", + "damage_factor": 0.5 + }, + { + "attacking_type": "electric", + "defending_type": "flying", + "damage_factor": 2.0 + }, + { + "attacking_type": "electric", + "defending_type": "water", + "damage_factor": 2.0 + }, + { + "attacking_type": "electric", + "defending_type": "grass", + "damage_factor": 0.5 + }, + { + "attacking_type": "electric", + "defending_type": "electric", + "damage_factor": 0.5 + }, + { + "attacking_type": "electric", + "defending_type": "dragon", + "damage_factor": 0.5 + }, + { + "attacking_type": "electric", + "defending_type": "ground", + "damage_factor": 0.0 + }, + { + "attacking_type": "grass", + "defending_type": "ground", + "damage_factor": 2.0 + }, + { + "attacking_type": "grass", + "defending_type": "rock", + "damage_factor": 2.0 + }, + { + "attacking_type": "grass", + "defending_type": "water", + "damage_factor": 2.0 + }, + { + "attacking_type": "grass", + "defending_type": "flying", + "damage_factor": 0.5 + }, + { + "attacking_type": "grass", + "defending_type": "poison", + "damage_factor": 0.5 + }, + { + "attacking_type": "grass", + "defending_type": "bug", + "damage_factor": 0.5 + }, + { + "attacking_type": "grass", + "defending_type": "fire", + "damage_factor": 0.5 + }, + { + "attacking_type": "grass", + "defending_type": "grass", + "damage_factor": 0.5 + }, + { + "attacking_type": "grass", + "defending_type": "dragon", + "damage_factor": 0.5 + }, + { + "attacking_type": "ice", + "defending_type": "flying", + "damage_factor": 2.0 + }, + { + "attacking_type": "ice", + "defending_type": "ground", + "damage_factor": 2.0 + }, + { + "attacking_type": "ice", + "defending_type": "grass", + "damage_factor": 2.0 + }, + { + "attacking_type": "ice", + "defending_type": "dragon", + "damage_factor": 2.0 + }, + { + "attacking_type": "ice", + "defending_type": "fire", + "damage_factor": 0.5 + }, + { + "attacking_type": "ice", + "defending_type": "water", + "damage_factor": 0.5 + }, + { + "attacking_type": "ice", + "defending_type": "ice", + "damage_factor": 0.5 + }, + { + "attacking_type": "fighting", + "defending_type": "normal", + "damage_factor": 2.0 + }, + { + "attacking_type": "fighting", + "defending_type": "rock", + "damage_factor": 2.0 + }, + { + "attacking_type": "fighting", + "defending_type": "ice", + "damage_factor": 2.0 + }, + { + "attacking_type": "fighting", + "defending_type": "flying", + "damage_factor": 0.5 + }, + { + "attacking_type": "fighting", + "defending_type": "poison", + "damage_factor": 0.5 + }, + { + "attacking_type": "fighting", + "defending_type": "bug", + "damage_factor": 0.5 + }, + { + "attacking_type": "fighting", + "defending_type": "psychic", + "damage_factor": 0.5 + }, + { + "attacking_type": "fighting", + "defending_type": "ghost", + "damage_factor": 0.0 + }, + { + "attacking_type": "poison", + "defending_type": "grass", + "damage_factor": 2.0 + }, + { + "attacking_type": "poison", + "defending_type": "poison", + "damage_factor": 0.5 + }, + { + "attacking_type": "poison", + "defending_type": "ground", + "damage_factor": 0.5 + }, + { + "attacking_type": "poison", + "defending_type": "rock", + "damage_factor": 0.5 + }, + { + "attacking_type": "poison", + "defending_type": "ghost", + "damage_factor": 0.5 + }, + { + "attacking_type": "ground", + "defending_type": "poison", + "damage_factor": 2.0 + }, + { + "attacking_type": "ground", + "defending_type": "rock", + "damage_factor": 2.0 + }, + { + "attacking_type": "ground", + "defending_type": "fire", + "damage_factor": 2.0 + }, + { + "attacking_type": "ground", + "defending_type": "electric", + "damage_factor": 2.0 + }, + { + "attacking_type": "ground", + "defending_type": "bug", + "damage_factor": 0.5 + }, + { + "attacking_type": "ground", + "defending_type": "grass", + "damage_factor": 0.5 + }, + { + "attacking_type": "ground", + "defending_type": "flying", + "damage_factor": 0.0 + }, + { + "attacking_type": "flying", + "defending_type": "fighting", + "damage_factor": 2.0 + }, + { + "attacking_type": "flying", + "defending_type": "bug", + "damage_factor": 2.0 + }, + { + "attacking_type": "flying", + "defending_type": "grass", + "damage_factor": 2.0 + }, + { + "attacking_type": "flying", + "defending_type": "rock", + "damage_factor": 0.5 + }, + { + "attacking_type": "flying", + "defending_type": "electric", + "damage_factor": 0.5 + }, + { + "attacking_type": "psychic", + "defending_type": "fighting", + "damage_factor": 2.0 + }, + { + "attacking_type": "psychic", + "defending_type": "poison", + "damage_factor": 2.0 + }, + { + "attacking_type": "psychic", + "defending_type": "psychic", + "damage_factor": 0.5 + }, + { + "attacking_type": "bug", + "defending_type": "grass", + "damage_factor": 2.0 + }, + { + "attacking_type": "bug", + "defending_type": "psychic", + "damage_factor": 2.0 + }, + { + "attacking_type": "bug", + "defending_type": "fighting", + "damage_factor": 0.5 + }, + { + "attacking_type": "bug", + "defending_type": "flying", + "damage_factor": 0.5 + }, + { + "attacking_type": "bug", + "defending_type": "poison", + "damage_factor": 0.5 + }, + { + "attacking_type": "bug", + "defending_type": "ghost", + "damage_factor": 0.5 + }, + { + "attacking_type": "bug", + "defending_type": "fire", + "damage_factor": 0.5 + }, + { + "attacking_type": "rock", + "defending_type": "flying", + "damage_factor": 2.0 + }, + { + "attacking_type": "rock", + "defending_type": "bug", + "damage_factor": 2.0 + }, + { + "attacking_type": "rock", + "defending_type": "fire", + "damage_factor": 2.0 + }, + { + "attacking_type": "rock", + "defending_type": "ice", + "damage_factor": 2.0 + }, + { + "attacking_type": "rock", + "defending_type": "fighting", + "damage_factor": 0.5 + }, + { + "attacking_type": "rock", + "defending_type": "ground", + "damage_factor": 0.5 + }, + { + "attacking_type": "ghost", + "defending_type": "ghost", + "damage_factor": 2.0 + }, + { + "attacking_type": "ghost", + "defending_type": "psychic", + "damage_factor": 2.0 + }, + { + "attacking_type": "ghost", + "defending_type": "normal", + "damage_factor": 0.0 + }, + { + "attacking_type": "dragon", + "defending_type": "dragon", + "damage_factor": 2.0 + } +] \ No newline at end of file diff --git a/tools/data/README.md b/tools/data/README.md new file mode 100644 index 0000000..4f33b17 --- /dev/null +++ b/tools/data/README.md @@ -0,0 +1,305 @@ +# Pokemon Data Downloader + +A comprehensive tool for downloading Pokemon battle data from the PokeAPI using the `pokebase` library. This tool supports segmented downloading, data validation, and exports data in JSON format optimized for the C++ Pokemon battle simulator. + +## Features + +- **Segmented Downloads**: Download specific ranges of Pokemon or moves for testing and incremental data collection +- **Concurrent Processing**: Multi-threaded downloads with configurable worker counts +- **Data Validation**: Built-in JSON schema validation for data integrity +- **Rate Limiting**: Respectful API usage with automatic rate limiting +- **Progress Tracking**: Beautiful progress bars and detailed logging +- **Battle-Ready Data**: Exports complete Pokemon stats, moves, types, and effectiveness data +- **CLI Interface**: Easy-to-use command-line interface with multiple commands + +## Installation + +1. Install dependencies: +```bash +cd /testbed +pip install -r tools/requirements.txt +``` + +2. The tool is ready to use! No additional setup required. + +## Quick Start + +### Download a Small Set of Pokemon (Testing) + +```bash +# Download first 5 Pokemon with their moves +python -m tools.data.pokemon_downloader download-pokemon --start 1 --end 5 --include-moves + +# Download specific Pokemon (Pikachu) +python -m tools.data.pokemon_downloader download-pokemon --start 25 --end 25 --include-moves +``` + +### Download Specific Moves + +```bash +# Download first 10 moves +python -m tools.data.pokemon_downloader download-moves --move-ids "1,2,3,4,5,6,7,8,9,10" +``` + +### Download Type Effectiveness Data + +```bash +# Download complete type effectiveness chart +python -m tools.data.pokemon_downloader download-types +``` + +### Download Complete Gen 1 Dataset + +```bash +# Download all Gen 1 Pokemon (1-151) with moves and type data +python -m tools.data.pokemon_downloader download-complete --start 1 --end 151 +``` + +## CLI Commands + +### Global Options + +- `--output-dir`: Directory to save downloaded data (default: `data`) +- `--cache-dir`: Directory for API response caching (default: `.cache`) +- `--no-validation`: Disable data validation before saving + +### Commands + +#### `download-pokemon` +Download Pokemon data for a specific ID range. + +```bash +python -m tools.data.pokemon_downloader download-pokemon [OPTIONS] +``` + +Options: +- `--start`: Starting Pokemon ID (default: 1) +- `--end`: Ending Pokemon ID (default: 10) +- `--workers`: Number of concurrent workers (default: 5) +- `--include-moves`: Also download moves for these Pokemon + +#### `download-moves` +Download specific moves by ID. + +```bash +python -m tools.data.pokemon_downloader download-moves --move-ids "1,2,3,4,5" +``` + +Options: +- `--move-ids`: Comma-separated list of move IDs +- `--workers`: Number of concurrent workers (default: 5) + +#### `download-types` +Download type effectiveness data. + +```bash +python -m tools.data.pokemon_downloader download-types +``` + +#### `download-complete` +Download complete dataset (Pokemon, moves, and type effectiveness). + +```bash +python -m tools.data.pokemon_downloader download-complete [OPTIONS] +``` + +Options: +- `--start`: Starting Pokemon ID (default: 1) +- `--end`: Ending Pokemon ID (default: 151 for Gen 1) +- `--workers`: Number of concurrent workers (default: 5) + +## Data Structure + +### Pokemon Data Format + +```json +{ + "1": { + "id": 1, + "name": "bulbasaur", + "types": ["grass", "poison"], + "base_stats": { + "hp": 45, + "attack": 49, + "defense": 49, + "special_attack": 65, + "special_defense": 65, + "speed": 45 + }, + "abilities": ["overgrow", "chlorophyll"], + "moves": [1, 2, 3, 4, ...], + "weight": 69, + "height": 7, + "base_experience": 64 + } +} +``` + +### Move Data Format + +```json +{ + "1": { + "id": 1, + "name": "pound", + "type": "normal", + "power": 40, + "accuracy": 100, + "pp": 35, + "priority": 0, + "damage_class": "physical", + "effect_id": null, + "effect_chance": null, + "target": "selected-pokemon", + "description": "Inflicts regular damage." + } +} +``` + +### Type Effectiveness Format + +```json +[ + { + "attacking_type": "fire", + "defending_type": "grass", + "damage_factor": 2.0 + }, + { + "attacking_type": "water", + "defending_type": "fire", + "damage_factor": 2.0 + } +] +``` + +## Examples + +### Basic Usage (Python API) + +```python +from pathlib import Path +from tools.data.pokemon_downloader import PokemonDownloader + +# Initialize downloader +downloader = PokemonDownloader( + output_dir=Path("my_pokemon_data"), + validate_data=True +) + +# Download first 10 Pokemon +pokemon_data = downloader.download_pokemon_batch(1, 10) +downloader.save_pokemon_data(pokemon_data, "starter_pokemon.json") + +# Download some moves +move_ids = [1, 2, 3, 4, 5] # Pound, Karate Chop, etc. +moves_data = downloader.download_moves_batch(move_ids) +downloader.save_moves_data(moves_data, "basic_moves.json") + +# Download type effectiveness +effectiveness = downloader.download_type_effectiveness() +downloader.save_type_effectiveness(effectiveness, "types.json") +``` + +### Testing Small Segments + +```python +# Test with just 3 Pokemon +python tools/data/test_downloader.py +``` + +### Custom Data Validation + +```python +from tools.data.schemas import DataValidator + +validator = DataValidator() + +# Validate Pokemon data +errors = validator.validate_pokemon_collection(pokemon_data) +if errors: + print("Validation errors:", errors) +``` + +## Performance Considerations + +- **Rate Limiting**: The tool implements 100ms delays between API calls to be respectful +- **Concurrent Workers**: Default of 5 workers balances speed with API courtesy +- **Caching**: API responses are cached to avoid redundant requests +- **Memory Usage**: Large datasets are processed in batches to manage memory + +## Recommended Usage Patterns + +### For Development/Testing +```bash +# Start small - download just a few Pokemon +python -m tools.data.pokemon_downloader download-pokemon --start 1 --end 3 --include-moves + +# Test specific Pokemon you're interested in +python -m tools.data.pokemon_downloader download-pokemon --start 25 --end 25 --include-moves # Pikachu +``` + +### For Production Data +```bash +# Download by generations or batches +python -m tools.data.pokemon_downloader download-pokemon --start 1 --end 50 --include-moves +python -m tools.data.pokemon_downloader download-pokemon --start 51 --end 100 --include-moves +python -m tools.data.pokemon_downloader download-pokemon --start 101 --end 151 --include-moves + +# Always download type effectiveness data +python -m tools.data.pokemon_downloader download-types +``` + +### For Complete Gen 1 Dataset +```bash +# One command for everything (will take several minutes) +python -m tools.data.pokemon_downloader download-complete --start 1 --end 151 +``` + +## Data Validation + +The tool includes comprehensive JSON schema validation: + +- **Pokemon Data**: Validates stats, types, abilities, and structure +- **Move Data**: Validates power, accuracy, PP, and damage classes +- **Type Effectiveness**: Validates damage multipliers and type names +- **Generation 1 Focus**: Ensures only valid Gen 1 types and data + +Validation errors are displayed during save operations but don't prevent saving (warnings only). + +## Integration with C++ Battle Simulator + +The exported JSON files are designed to be easily consumed by the C++ battle simulator: + +1. **Pokemon Data**: Direct mapping to Pokemon class properties +2. **Move Data**: Complete move information for battle calculations +3. **Type Effectiveness**: Lookup table for damage calculations +4. **Consistent IDs**: All data uses consistent Pokemon and move IDs + +## Troubleshooting + +### Common Issues + +1. **Network Errors**: The tool retries failed requests automatically +2. **Rate Limiting**: Built-in delays prevent API rate limiting +3. **Memory Usage**: Large downloads are processed in batches +4. **Validation Warnings**: Usually safe to ignore, indicate minor data inconsistencies + +### Getting Help + +- Run tests: `python tools/data/test_downloader.py` +- Check logs: The tool provides detailed logging for debugging +- Validate data: Use `--no-validation` flag if validation is too strict + +## Contributing + +To extend the downloader: + +1. Add new data structures to `pokemon_downloader.py` +2. Update validation schemas in `schemas.py` +3. Add tests to `test_downloader.py` +4. Update this documentation + +## License + +This tool is part of the Pokemon Battle Simulator project and follows the same license terms. diff --git a/tools/data/example_usage.py b/tools/data/example_usage.py new file mode 100644 index 0000000..b17e5b8 --- /dev/null +++ b/tools/data/example_usage.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +""" +Example usage of the Pokemon Data Downloader. + +This script demonstrates various ways to use the Pokemon downloader +for different scenarios and use cases. +""" + +import sys +from pathlib import Path + +# Add the tools directory to Python path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from data.pokemon_downloader import PokemonDownloader +from data.schemas import DataValidator +from rich.console import Console +from rich.panel import Panel +from rich.table import Table + +console = Console() + + +def example_basic_download(): + """Example 1: Basic Pokemon download.""" + console.print(Panel.fit("Example 1: Basic Pokemon Download", style="bold blue")) + + # Create downloader with custom output directory + downloader = PokemonDownloader(output_dir=Path("example_data")) + + # Download first 3 Pokemon (Bulbasaur, Ivysaur, Venusaur) + console.print("Downloading first 3 Pokemon...") + pokemon_data = downloader.download_pokemon_batch(1, 3, max_workers=2) + + if pokemon_data: + # Save to file + downloader.save_pokemon_data(pokemon_data, "starter_pokemon.json") + + # Display summary + table = downloader.get_stats_summary(pokemon_data) + console.print(table) + + console.print("✅ Basic download complete!\n") + + +def example_moves_download(): + """Example 2: Download specific moves.""" + console.print(Panel.fit("Example 2: Download Specific Moves", style="bold blue")) + + downloader = PokemonDownloader(output_dir=Path("example_data")) + + # Download some classic moves + classic_moves = [1, 2, 33, 34, 36] # Pound, Karate Chop, Tackle, Body Slam, Take Down + console.print(f"Downloading {len(classic_moves)} classic moves...") + + moves_data = downloader.download_moves_batch(classic_moves, max_workers=3) + + if moves_data: + downloader.save_moves_data(moves_data, "classic_moves.json") + + # Show move details + table = Table(title="Downloaded Moves") + table.add_column("Name", style="cyan") + table.add_column("Type", style="green") + table.add_column("Power", style="magenta") + table.add_column("Accuracy", style="yellow") + + for move in moves_data.values(): + power_str = str(move.power) if move.power else "—" + accuracy_str = str(move.accuracy) if move.accuracy else "—" + table.add_row( + move.name.title(), + move.type.title(), + power_str, + accuracy_str + ) + + console.print(table) + + console.print("✅ Moves download complete!\n") + + +def example_validation(): + """Example 3: Data validation.""" + console.print(Panel.fit("Example 3: Data Validation", style="bold blue")) + + validator = DataValidator() + + # Create sample Pokemon data for validation + sample_pokemon = { + "25": { # Pikachu + "id": 25, + "name": "pikachu", + "types": ["electric"], + "base_stats": { + "hp": 35, + "attack": 55, + "defense": 40, + "special_attack": 50, + "special_defense": 50, + "speed": 90 + }, + "abilities": ["static", "lightning-rod"], + "moves": [1, 2, 3, 4, 5], + "weight": 60, + "height": 4, + "base_experience": 112 + } + } + + # Validate the data + errors = validator.validate_pokemon_collection(sample_pokemon) + + if errors: + console.print(f"❌ Validation found {len(errors)} errors:") + for error in errors: + console.print(f" - {error}") + else: + console.print("✅ Sample Pokemon data is valid!") + + console.print("✅ Validation example complete!\n") + + +def example_type_effectiveness(): + """Example 4: Download type effectiveness data.""" + console.print(Panel.fit("Example 4: Type Effectiveness", style="bold blue")) + + downloader = PokemonDownloader(output_dir=Path("example_data")) + + console.print("Downloading type effectiveness data...") + effectiveness_data = downloader.download_type_effectiveness() + + if effectiveness_data: + downloader.save_type_effectiveness(effectiveness_data, "type_chart.json") + + # Show some interesting type matchups + table = Table(title="Sample Type Effectiveness") + table.add_column("Attacking Type", style="cyan") + table.add_column("Defending Type", style="green") + table.add_column("Effectiveness", style="magenta") + + # Show super effective matchups + super_effective = [e for e in effectiveness_data if e.damage_factor == 2.0][:5] + for entry in super_effective: + table.add_row( + entry.attacking_type.title(), + entry.defending_type.title(), + "Super Effective (2x)" + ) + + console.print(table) + console.print(f"Total effectiveness entries: {len(effectiveness_data)}") + + console.print("✅ Type effectiveness download complete!\n") + + +def example_integrated_workflow(): + """Example 5: Integrated workflow - Pokemon with their moves.""" + console.print(Panel.fit("Example 5: Integrated Workflow", style="bold blue")) + + downloader = PokemonDownloader(output_dir=Path("example_data")) + + # Download a specific Pokemon (Charizard) + console.print("Downloading Charizard (ID: 6)...") + pokemon_data = downloader.download_pokemon_batch(6, 6, max_workers=1) + + if pokemon_data: + charizard = pokemon_data[6] + console.print(f"✅ Downloaded {charizard.name.title()}") + console.print(f" - Types: {', '.join(charizard.types)}") + console.print(f" - Base stats total: {sum(charizard.base_stats.__dict__.values())}") + console.print(f" - Can learn {len(charizard.moves)} moves") + + # Download first 15 moves that Charizard can learn + charizard_moves = charizard.moves[:15] + console.print(f"\nDownloading {len(charizard_moves)} of Charizard's moves...") + + moves_data = downloader.download_moves_batch(charizard_moves, max_workers=3) + + if moves_data: + # Save both datasets + downloader.save_pokemon_data(pokemon_data, "charizard.json") + downloader.save_moves_data(moves_data, "charizard_moves.json") + + # Show some moves + console.print("\nSample moves Charizard can learn:") + for move_id, move in list(moves_data.items())[:5]: + power_str = f"{move.power} power" if move.power else "status move" + console.print(f" - {move.name.title()} ({move.type} type, {power_str})") + + console.print("✅ Integrated workflow complete!\n") + + +def main(): + """Run all examples.""" + console.print(Panel.fit( + "🔥 Pokemon Data Downloader Examples", + style="bold red" + )) + + console.print("This script demonstrates various usage patterns for the Pokemon downloader.\n") + + # Run examples + try: + example_basic_download() + example_moves_download() + example_validation() + example_type_effectiveness() + example_integrated_workflow() + + console.print(Panel.fit( + "🎉 All examples completed successfully!\n\n" + "Check the 'example_data' directory for downloaded files.\n" + "You can now use these patterns in your own projects.", + style="bold green" + )) + + except KeyboardInterrupt: + console.print("\n⚠️ Examples interrupted by user") + except Exception as e: + console.print(f"\n❌ Error running examples: {e}") + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/data/pokemon_downloader.py b/tools/data/pokemon_downloader.py new file mode 100644 index 0000000..29d6487 --- /dev/null +++ b/tools/data/pokemon_downloader.py @@ -0,0 +1,640 @@ +#!/usr/bin/env python3 +""" +Pokemon Data Downloader + +This module provides functionality to download Pokemon data from the PokeAPI +using the pokebase library. It supports segmented downloading to allow for +efficient data management and testing with smaller datasets. + +Features: +- Download Pokemon species data (stats, types, abilities) +- Download move data (power, accuracy, effects, type) +- Download type effectiveness data +- Segmented downloading by ID ranges +- Data validation and caching +- Progress tracking with rich progress bars +- Export to JSON format for C++ integration +""" + +import json +import logging +import time +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Dict, List, Optional, Set, Tuple, Any +from concurrent.futures import ThreadPoolExecutor, as_completed +import threading + +import pokebase as pb +from rich.console import Console +from rich.progress import Progress, TaskID, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn +from rich.table import Table +from rich.panel import Panel +import click + +from .schemas import DataValidator + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +console = Console() + + +@dataclass +class PokemonStats: + """Pokemon base stats structure.""" + hp: int + attack: int + defense: int + special_attack: int + special_defense: int + speed: int + + +@dataclass +class PokemonData: + """Complete Pokemon data structure for battle simulation.""" + id: int + name: str + types: List[str] + base_stats: PokemonStats + abilities: List[str] + moves: List[int] # Move IDs that this Pokemon can learn + weight: int + height: int + base_experience: int + + +@dataclass +class MoveData: + """Move data structure for battle simulation.""" + id: int + name: str + type: str + power: Optional[int] + accuracy: Optional[int] + pp: int + priority: int + damage_class: str # physical, special, or status + effect_id: Optional[int] + effect_chance: Optional[int] + target: str + description: str + + +@dataclass +class TypeEffectiveness: + """Type effectiveness data structure.""" + attacking_type: str + defending_type: str + damage_factor: float # 0.0, 0.5, 1.0, 2.0 + + +class PokemonDownloader: + """Main class for downloading Pokemon data from PokeAPI.""" + + def __init__(self, output_dir: Path = Path("data"), cache_dir: Path = Path(".cache"), validate_data: bool = True): + """ + Initialize the Pokemon downloader. + + Args: + output_dir: Directory to save the downloaded data + cache_dir: Directory for caching API responses + validate_data: Whether to validate data before saving + """ + self.output_dir = Path(output_dir) + self.cache_dir = Path(cache_dir) + self.output_dir.mkdir(exist_ok=True) + self.cache_dir.mkdir(exist_ok=True) + + # Data validation + self.validate_data = validate_data + self.validator = DataValidator() if validate_data else None + + # Thread safety for concurrent downloads + self._lock = threading.Lock() + self._downloaded_pokemon: Set[int] = set() + self._downloaded_moves: Set[int] = set() + + # Rate limiting + self._last_request_time = 0.0 + self._min_request_interval = 0.1 # 100ms between requests + + def _rate_limit(self): + """Implement simple rate limiting to be respectful to the API.""" + with self._lock: + current_time = time.time() + time_since_last = current_time - self._last_request_time + if time_since_last < self._min_request_interval: + time.sleep(self._min_request_interval - time_since_last) + self._last_request_time = time.time() + + def _safe_api_call(self, func, *args, **kwargs): + """Make a safe API call with rate limiting and error handling.""" + self._rate_limit() + max_retries = 3 + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except Exception as e: + if attempt == max_retries - 1: + logger.error(f"API call failed after {max_retries} attempts: {e}") + raise + logger.warning(f"API call attempt {attempt + 1} failed, retrying: {e}") + time.sleep(1.0 * (attempt + 1)) # Exponential backoff + + def download_pokemon(self, pokemon_id: int) -> Optional[PokemonData]: + """ + Download data for a single Pokemon. + + Args: + pokemon_id: The Pokemon ID to download + + Returns: + PokemonData object or None if download failed + """ + try: + # Download Pokemon species and Pokemon data + pokemon = self._safe_api_call(pb.pokemon, pokemon_id) + species = self._safe_api_call(pb.pokemon_species, pokemon_id) + + # Extract base stats + stats_map = {stat.stat.name: stat.base_stat for stat in pokemon.stats} + base_stats = PokemonStats( + hp=stats_map.get('hp', 0), + attack=stats_map.get('attack', 0), + defense=stats_map.get('defense', 0), + special_attack=stats_map.get('special-attack', 0), + special_defense=stats_map.get('special-defense', 0), + speed=stats_map.get('speed', 0) + ) + + # Extract types + types = [t.type.name for t in pokemon.types] + + # Extract abilities + abilities = [ability.ability.name for ability in pokemon.abilities] + + # Extract learnable moves (just IDs for now) + moves = [move.move.url.split('/')[-2] for move in pokemon.moves] + moves = [int(move_id) for move_id in moves if move_id.isdigit()] + + return PokemonData( + id=pokemon.id, + name=pokemon.name, + types=types, + base_stats=base_stats, + abilities=abilities, + moves=moves, + weight=pokemon.weight, + height=pokemon.height, + base_experience=pokemon.base_experience or 0 + ) + + except Exception as e: + logger.error(f"Failed to download Pokemon {pokemon_id}: {e}") + return None + + def download_move(self, move_id: int) -> Optional[MoveData]: + """ + Download data for a single move. + + Args: + move_id: The move ID to download + + Returns: + MoveData object or None if download failed + """ + try: + move = self._safe_api_call(pb.move, move_id) + + # Extract effect description (English) + description = "" + if move.effect_entries: + for entry in move.effect_entries: + if entry.language.name == 'en': + description = entry.short_effect or entry.effect or "" + break + + return MoveData( + id=move.id, + name=move.name, + type=move.type.name if move.type else "normal", + power=move.power, + accuracy=move.accuracy, + pp=move.pp or 0, + priority=move.priority or 0, + damage_class=move.damage_class.name if move.damage_class else "status", + effect_id=None, # Effect ID not directly available in this API version + effect_chance=move.effect_chance, + target=move.target.name if move.target else "selected-pokemon", + description=description + ) + + except Exception as e: + logger.error(f"Failed to download move {move_id}: {e}") + return None + + def download_type_effectiveness(self) -> List[TypeEffectiveness]: + """ + Download type effectiveness data. + + Returns: + List of TypeEffectiveness objects + """ + effectiveness_data = [] + + try: + # Get all types (Gen 1 has 15 types) + gen1_types = [ + 'normal', 'fire', 'water', 'electric', 'grass', 'ice', + 'fighting', 'poison', 'ground', 'flying', 'psychic', + 'bug', 'rock', 'ghost', 'dragon' + ] + + for type_name in gen1_types: + type_data = self._safe_api_call(pb.type_, type_name) + + # Double damage to + for relation in type_data.damage_relations.double_damage_to: + if relation.name in gen1_types: + effectiveness_data.append(TypeEffectiveness( + attacking_type=type_name, + defending_type=relation.name, + damage_factor=2.0 + )) + + # Half damage to + for relation in type_data.damage_relations.half_damage_to: + if relation.name in gen1_types: + effectiveness_data.append(TypeEffectiveness( + attacking_type=type_name, + defending_type=relation.name, + damage_factor=0.5 + )) + + # No damage to + for relation in type_data.damage_relations.no_damage_to: + if relation.name in gen1_types: + effectiveness_data.append(TypeEffectiveness( + attacking_type=type_name, + defending_type=relation.name, + damage_factor=0.0 + )) + + return effectiveness_data + + except Exception as e: + logger.error(f"Failed to download type effectiveness: {e}") + return [] + + def download_pokemon_batch(self, start_id: int, end_id: int, max_workers: int = 5) -> Dict[int, PokemonData]: + """ + Download a batch of Pokemon data concurrently. + + Args: + start_id: Starting Pokemon ID (inclusive) + end_id: Ending Pokemon ID (inclusive) + max_workers: Maximum number of concurrent downloads + + Returns: + Dictionary mapping Pokemon ID to PokemonData + """ + pokemon_data = {} + pokemon_ids = list(range(start_id, end_id + 1)) + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeRemainingColumn(), + console=console + ) as progress: + + task = progress.add_task( + f"Downloading Pokemon {start_id}-{end_id}", + total=len(pokemon_ids) + ) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all download tasks + future_to_id = { + executor.submit(self.download_pokemon, pokemon_id): pokemon_id + for pokemon_id in pokemon_ids + } + + # Collect results as they complete + for future in as_completed(future_to_id): + pokemon_id = future_to_id[future] + try: + result = future.result() + if result: + pokemon_data[pokemon_id] = result + with self._lock: + self._downloaded_pokemon.add(pokemon_id) + except Exception as e: + logger.error(f"Pokemon {pokemon_id} download failed: {e}") + + progress.update(task, advance=1) + + return pokemon_data + + def download_moves_batch(self, move_ids: List[int], max_workers: int = 5) -> Dict[int, MoveData]: + """ + Download a batch of move data concurrently. + + Args: + move_ids: List of move IDs to download + max_workers: Maximum number of concurrent downloads + + Returns: + Dictionary mapping move ID to MoveData + """ + moves_data = {} + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeRemainingColumn(), + console=console + ) as progress: + + task = progress.add_task( + f"Downloading {len(move_ids)} moves", + total=len(move_ids) + ) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all download tasks + future_to_id = { + executor.submit(self.download_move, move_id): move_id + for move_id in move_ids + } + + # Collect results as they complete + for future in as_completed(future_to_id): + move_id = future_to_id[future] + try: + result = future.result() + if result: + moves_data[move_id] = result + with self._lock: + self._downloaded_moves.add(move_id) + except Exception as e: + logger.error(f"Move {move_id} download failed: {e}") + + progress.update(task, advance=1) + + return moves_data + + def save_pokemon_data(self, pokemon_data: Dict[int, PokemonData], filename: str = "pokemon.json"): + """Save Pokemon data to JSON file with optional validation.""" + output_file = self.output_dir / filename + data_dict = {str(k): asdict(v) for k, v in pokemon_data.items()} + + # Validate data before saving if validation is enabled + if self.validate_data and self.validator: + errors = self.validator.validate_pokemon_collection(data_dict) + if errors: + console.print(f"⚠️ Validation warnings for {filename}:") + for error in errors[:10]: # Show first 10 errors + console.print(f" - {error}") + if len(errors) > 10: + console.print(f" ... and {len(errors) - 10} more errors") + + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(data_dict, f, indent=2, ensure_ascii=False) + + console.print(f"✅ Saved {len(pokemon_data)} Pokemon to {output_file}") + + def save_moves_data(self, moves_data: Dict[int, MoveData], filename: str = "moves.json"): + """Save moves data to JSON file with optional validation.""" + output_file = self.output_dir / filename + data_dict = {str(k): asdict(v) for k, v in moves_data.items()} + + # Validate data before saving if validation is enabled + if self.validate_data and self.validator: + errors = self.validator.validate_move_collection(data_dict) + if errors: + console.print(f"⚠️ Validation warnings for {filename}:") + for error in errors[:10]: # Show first 10 errors + console.print(f" - {error}") + if len(errors) > 10: + console.print(f" ... and {len(errors) - 10} more errors") + + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(data_dict, f, indent=2, ensure_ascii=False) + + console.print(f"✅ Saved {len(moves_data)} moves to {output_file}") + + def save_type_effectiveness(self, effectiveness_data: List[TypeEffectiveness], filename: str = "type_effectiveness.json"): + """Save type effectiveness data to JSON file with optional validation.""" + output_file = self.output_dir / filename + data_dict = [asdict(item) for item in effectiveness_data] + + # Validate data before saving if validation is enabled + if self.validate_data and self.validator: + errors = self.validator.validate_type_effectiveness(data_dict) + if errors: + console.print(f"⚠️ Validation warnings for {filename}:") + for error in errors[:10]: # Show first 10 errors + console.print(f" - {error}") + if len(errors) > 10: + console.print(f" ... and {len(errors) - 10} more errors") + + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(data_dict, f, indent=2, ensure_ascii=False) + + console.print(f"✅ Saved {len(effectiveness_data)} type effectiveness entries to {output_file}") + + def get_stats_summary(self, pokemon_data: Dict[int, PokemonData]) -> Table: + """Generate a summary table of downloaded Pokemon data.""" + table = Table(title="Downloaded Pokemon Summary") + table.add_column("Metric", style="cyan") + table.add_column("Value", style="magenta") + + if not pokemon_data: + table.add_row("Total Pokemon", "0") + return table + + # Calculate statistics + total_pokemon = len(pokemon_data) + types_count = {} + total_moves = set() + + for pokemon in pokemon_data.values(): + for ptype in pokemon.types: + types_count[ptype] = types_count.get(ptype, 0) + 1 + total_moves.update(pokemon.moves) + + # Add rows to table + table.add_row("Total Pokemon", str(total_pokemon)) + table.add_row("Unique Moves Referenced", str(len(total_moves))) + table.add_row("Most Common Type", max(types_count, key=types_count.get) if types_count else "N/A") + table.add_row("ID Range", f"{min(pokemon_data.keys())}-{max(pokemon_data.keys())}") + + return table + + +# CLI Interface +@click.group() +@click.option('--output-dir', default='data', help='Output directory for downloaded data') +@click.option('--cache-dir', default='.cache', help='Cache directory for API responses') +@click.option('--no-validation', is_flag=True, help='Disable data validation') +@click.pass_context +def cli(ctx, output_dir, cache_dir, no_validation): + """Pokemon Data Downloader CLI.""" + ctx.ensure_object(dict) + ctx.obj['downloader'] = PokemonDownloader( + Path(output_dir), + Path(cache_dir), + validate_data=not no_validation + ) + + +@cli.command() +@click.option('--start', default=1, help='Starting Pokemon ID') +@click.option('--end', default=10, help='Ending Pokemon ID') +@click.option('--workers', default=5, help='Number of concurrent workers') +@click.option('--include-moves', is_flag=True, help='Also download moves for these Pokemon') +@click.pass_context +def download_pokemon(ctx, start, end, workers, include_moves): + """Download Pokemon data for a specific ID range.""" + downloader = ctx.obj['downloader'] + + console.print(Panel.fit( + f"🔽 Downloading Pokemon {start} to {end}", + style="bold blue" + )) + + # Download Pokemon data + pokemon_data = downloader.download_pokemon_batch(start, end, workers) + + if pokemon_data: + # Save Pokemon data + filename = f"pokemon_{start}_{end}.json" + downloader.save_pokemon_data(pokemon_data, filename) + + # Show summary + summary_table = downloader.get_stats_summary(pokemon_data) + console.print(summary_table) + + # Download moves if requested + if include_moves: + all_move_ids = set() + for pokemon in pokemon_data.values(): + all_move_ids.update(pokemon.moves) + + if all_move_ids: + console.print(f"\n🔽 Downloading {len(all_move_ids)} unique moves...") + moves_data = downloader.download_moves_batch(list(all_move_ids), workers) + if moves_data: + moves_filename = f"moves_{start}_{end}.json" + downloader.save_moves_data(moves_data, moves_filename) + else: + console.print("❌ No Pokemon data was successfully downloaded") + + +@cli.command() +@click.option('--move-ids', help='Comma-separated list of move IDs to download') +@click.option('--workers', default=5, help='Number of concurrent workers') +@click.pass_context +def download_moves(ctx, move_ids, workers): + """Download specific moves by ID.""" + downloader = ctx.obj['downloader'] + + if not move_ids: + console.print("❌ Please provide move IDs with --move-ids") + return + + try: + ids = [int(x.strip()) for x in move_ids.split(',')] + except ValueError: + console.print("❌ Invalid move IDs format. Use comma-separated integers.") + return + + console.print(Panel.fit( + f"🔽 Downloading {len(ids)} moves", + style="bold blue" + )) + + moves_data = downloader.download_moves_batch(ids, workers) + + if moves_data: + downloader.save_moves_data(moves_data, "custom_moves.json") + console.print(f"✅ Successfully downloaded {len(moves_data)} moves") + else: + console.print("❌ No move data was successfully downloaded") + + +@cli.command() +@click.pass_context +def download_types(ctx): + """Download type effectiveness data.""" + downloader = ctx.obj['downloader'] + + console.print(Panel.fit( + "🔽 Downloading type effectiveness data", + style="bold blue" + )) + + effectiveness_data = downloader.download_type_effectiveness() + + if effectiveness_data: + downloader.save_type_effectiveness(effectiveness_data) + console.print(f"✅ Successfully downloaded {len(effectiveness_data)} type effectiveness entries") + else: + console.print("❌ Failed to download type effectiveness data") + + +@cli.command() +@click.option('--start', default=1, help='Starting Pokemon ID') +@click.option('--end', default=151, help='Ending Pokemon ID (151 for Gen 1)') +@click.option('--workers', default=5, help='Number of concurrent workers') +@click.pass_context +def download_complete(ctx, start, end, workers): + """Download complete dataset (Pokemon, moves, and type effectiveness).""" + downloader = ctx.obj['downloader'] + + console.print(Panel.fit( + f"🔽 Downloading complete Pokemon dataset ({start}-{end})", + style="bold blue" + )) + + # Download Pokemon + pokemon_data = downloader.download_pokemon_batch(start, end, workers) + + if pokemon_data: + downloader.save_pokemon_data(pokemon_data, f"pokemon_complete_{start}_{end}.json") + + # Get all unique moves + all_move_ids = set() + for pokemon in pokemon_data.values(): + all_move_ids.update(pokemon.moves) + + # Download moves + if all_move_ids: + moves_data = downloader.download_moves_batch(list(all_move_ids), workers) + if moves_data: + downloader.save_moves_data(moves_data, f"moves_complete_{start}_{end}.json") + + # Download type effectiveness + effectiveness_data = downloader.download_type_effectiveness() + if effectiveness_data: + downloader.save_type_effectiveness(effectiveness_data, "type_effectiveness_complete.json") + + # Show final summary + summary_table = downloader.get_stats_summary(pokemon_data) + console.print(summary_table) + + console.print("🎉 Complete dataset download finished!") + else: + console.print("❌ Failed to download Pokemon data") + + +if __name__ == "__main__": + cli() diff --git a/tools/data/schemas.py b/tools/data/schemas.py new file mode 100644 index 0000000..f7c63dc --- /dev/null +++ b/tools/data/schemas.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +""" +Data validation schemas for Pokemon data. + +This module provides JSON schemas and validation functions for Pokemon data +downloaded from the PokeAPI. It ensures data consistency and catches errors +early in the data processing pipeline. +""" + +import json +import jsonschema +from typing import Dict, Any, List +from pathlib import Path + +# JSON Schema for Pokemon base stats +POKEMON_STATS_SCHEMA = { + "type": "object", + "properties": { + "hp": {"type": "integer", "minimum": 0, "maximum": 255}, + "attack": {"type": "integer", "minimum": 0, "maximum": 255}, + "defense": {"type": "integer", "minimum": 0, "maximum": 255}, + "special_attack": {"type": "integer", "minimum": 0, "maximum": 255}, + "special_defense": {"type": "integer", "minimum": 0, "maximum": 255}, + "speed": {"type": "integer", "minimum": 0, "maximum": 255} + }, + "required": ["hp", "attack", "defense", "special_attack", "special_defense", "speed"], + "additionalProperties": False +} + +# JSON Schema for individual Pokemon data +POKEMON_SCHEMA = { + "type": "object", + "properties": { + "id": {"type": "integer", "minimum": 1, "maximum": 1010}, # Current max Pokemon ID + "name": {"type": "string", "minLength": 1}, + "types": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1, + "maxItems": 2, + "uniqueItems": True + }, + "base_stats": POKEMON_STATS_SCHEMA, + "abilities": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1, + "uniqueItems": True + }, + "moves": { + "type": "array", + "items": {"type": "integer", "minimum": 1}, + "uniqueItems": True + }, + "weight": {"type": "integer", "minimum": 0}, + "height": {"type": "integer", "minimum": 0}, + "base_experience": {"type": "integer", "minimum": 0} + }, + "required": ["id", "name", "types", "base_stats", "abilities", "moves", "weight", "height", "base_experience"], + "additionalProperties": False +} + +# JSON Schema for Pokemon collection (multiple Pokemon) +POKEMON_COLLECTION_SCHEMA = { + "type": "object", + "patternProperties": { + "^[0-9]+$": POKEMON_SCHEMA # Keys must be numeric strings (Pokemon IDs) + }, + "additionalProperties": False +} + +# JSON Schema for individual move data +MOVE_SCHEMA = { + "type": "object", + "properties": { + "id": {"type": "integer", "minimum": 1}, + "name": {"type": "string", "minLength": 1}, + "type": {"type": "string", "minLength": 1}, + "power": {"type": ["integer", "null"], "minimum": 0, "maximum": 250}, + "accuracy": {"type": ["integer", "null"], "minimum": 0, "maximum": 100}, + "pp": {"type": "integer", "minimum": 0, "maximum": 64}, + "priority": {"type": "integer", "minimum": -7, "maximum": 5}, + "damage_class": { + "type": "string", + "enum": ["physical", "special", "status"] + }, + "effect_id": {"type": ["integer", "null"], "minimum": 1}, + "effect_chance": {"type": ["integer", "null"], "minimum": 0, "maximum": 100}, + "target": {"type": "string", "minLength": 1}, + "description": {"type": "string"} + }, + "required": ["id", "name", "type", "power", "accuracy", "pp", "priority", "damage_class", "effect_id", "effect_chance", "target", "description"], + "additionalProperties": False +} + +# JSON Schema for move collection +MOVE_COLLECTION_SCHEMA = { + "type": "object", + "patternProperties": { + "^[0-9]+$": MOVE_SCHEMA # Keys must be numeric strings (Move IDs) + }, + "additionalProperties": False +} + +# JSON Schema for type effectiveness entry +TYPE_EFFECTIVENESS_ENTRY_SCHEMA = { + "type": "object", + "properties": { + "attacking_type": {"type": "string", "minLength": 1}, + "defending_type": {"type": "string", "minLength": 1}, + "damage_factor": { + "type": "number", + "enum": [0.0, 0.5, 1.0, 2.0] # Only valid damage multipliers + } + }, + "required": ["attacking_type", "defending_type", "damage_factor"], + "additionalProperties": False +} + +# JSON Schema for type effectiveness collection +TYPE_EFFECTIVENESS_SCHEMA = { + "type": "array", + "items": TYPE_EFFECTIVENESS_ENTRY_SCHEMA, + "uniqueItems": True +} + +# Valid Generation 1 types for additional validation +GEN1_TYPES = { + 'normal', 'fire', 'water', 'electric', 'grass', 'ice', + 'fighting', 'poison', 'ground', 'flying', 'psychic', + 'bug', 'rock', 'ghost', 'dragon' +} + + +class DataValidator: + """Validator class for Pokemon data using JSON schemas.""" + + def __init__(self): + """Initialize the validator with compiled schemas.""" + self.pokemon_validator = jsonschema.Draft7Validator(POKEMON_SCHEMA) + self.pokemon_collection_validator = jsonschema.Draft7Validator(POKEMON_COLLECTION_SCHEMA) + self.move_validator = jsonschema.Draft7Validator(MOVE_SCHEMA) + self.move_collection_validator = jsonschema.Draft7Validator(MOVE_COLLECTION_SCHEMA) + self.type_effectiveness_validator = jsonschema.Draft7Validator(TYPE_EFFECTIVENESS_SCHEMA) + + def validate_pokemon(self, pokemon_data: Dict[str, Any]) -> List[str]: + """ + Validate a single Pokemon data entry. + + Args: + pokemon_data: Dictionary containing Pokemon data + + Returns: + List of validation error messages (empty if valid) + """ + errors = [] + + # Schema validation + for error in self.pokemon_validator.iter_errors(pokemon_data): + errors.append(f"Schema error: {error.message}") + + # Additional business logic validation + if 'types' in pokemon_data: + for ptype in pokemon_data['types']: + if ptype not in GEN1_TYPES: + errors.append(f"Invalid type '{ptype}' - not a Generation 1 type") + + # Validate stat totals are reasonable + if 'base_stats' in pokemon_data: + stats = pokemon_data['base_stats'] + total_stats = sum(stats.values()) + if total_stats < 180: # Minimum reasonable total (like Sunkern) + errors.append(f"Base stat total {total_stats} seems too low") + elif total_stats > 720: # Maximum reasonable total (like Arceus) + errors.append(f"Base stat total {total_stats} seems too high") + + return errors + + def validate_pokemon_collection(self, collection_data: Dict[str, Any]) -> List[str]: + """ + Validate a collection of Pokemon data. + + Args: + collection_data: Dictionary mapping Pokemon IDs to Pokemon data + + Returns: + List of validation error messages (empty if valid) + """ + errors = [] + + # Schema validation for the collection structure + for error in self.pokemon_collection_validator.iter_errors(collection_data): + errors.append(f"Collection schema error: {error.message}") + + # Validate each Pokemon individually + for pokemon_id, pokemon_data in collection_data.items(): + pokemon_errors = self.validate_pokemon(pokemon_data) + for error in pokemon_errors: + errors.append(f"Pokemon {pokemon_id}: {error}") + + return errors + + def validate_move(self, move_data: Dict[str, Any]) -> List[str]: + """ + Validate a single move data entry. + + Args: + move_data: Dictionary containing move data + + Returns: + List of validation error messages (empty if valid) + """ + errors = [] + + # Schema validation + for error in self.move_validator.iter_errors(move_data): + errors.append(f"Schema error: {error.message}") + + # Additional business logic validation + if 'type' in move_data: + if move_data['type'] not in GEN1_TYPES: + errors.append(f"Invalid move type '{move_data['type']}' - not a Generation 1 type") + + # Validate power/accuracy combinations make sense + if 'power' in move_data and 'damage_class' in move_data: + power = move_data['power'] + damage_class = move_data['damage_class'] + + if damage_class == 'status' and power is not None: + errors.append("Status moves should not have power") + elif damage_class in ['physical', 'special'] and power is None: + errors.append("Damaging moves should have power") + + return errors + + def validate_move_collection(self, collection_data: Dict[str, Any]) -> List[str]: + """ + Validate a collection of move data. + + Args: + collection_data: Dictionary mapping move IDs to move data + + Returns: + List of validation error messages (empty if valid) + """ + errors = [] + + # Schema validation for the collection structure + for error in self.move_collection_validator.iter_errors(collection_data): + errors.append(f"Collection schema error: {error.message}") + + # Validate each move individually + for move_id, move_data in collection_data.items(): + move_errors = self.validate_move(move_data) + for error in move_errors: + errors.append(f"Move {move_id}: {error}") + + return errors + + def validate_type_effectiveness(self, effectiveness_data: List[Dict[str, Any]]) -> List[str]: + """ + Validate type effectiveness data. + + Args: + effectiveness_data: List of type effectiveness entries + + Returns: + List of validation error messages (empty if valid) + """ + errors = [] + + # Schema validation + for error in self.type_effectiveness_validator.iter_errors(effectiveness_data): + errors.append(f"Schema error: {error.message}") + + # Additional validation + for i, entry in enumerate(effectiveness_data): + if 'attacking_type' in entry and entry['attacking_type'] not in GEN1_TYPES: + errors.append(f"Entry {i}: Invalid attacking type '{entry['attacking_type']}'") + + if 'defending_type' in entry and entry['defending_type'] not in GEN1_TYPES: + errors.append(f"Entry {i}: Invalid defending type '{entry['defending_type']}'") + + return errors + + def validate_file(self, file_path: Path, data_type: str) -> List[str]: + """ + Validate a JSON data file. + + Args: + file_path: Path to the JSON file + data_type: Type of data ('pokemon', 'moves', 'types') + + Returns: + List of validation error messages (empty if valid) + """ + try: + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except (json.JSONDecodeError, FileNotFoundError) as e: + return [f"Failed to load file {file_path}: {e}"] + + if data_type == 'pokemon': + return self.validate_pokemon_collection(data) + elif data_type == 'moves': + return self.validate_move_collection(data) + elif data_type == 'types': + return self.validate_type_effectiveness(data) + else: + return [f"Unknown data type: {data_type}"] + + +def save_schemas_to_files(output_dir: Path): + """Save JSON schemas to files for external use.""" + output_dir = Path(output_dir) + output_dir.mkdir(exist_ok=True) + + schemas = { + 'pokemon.schema.json': POKEMON_COLLECTION_SCHEMA, + 'moves.schema.json': MOVE_COLLECTION_SCHEMA, + 'type_effectiveness.schema.json': TYPE_EFFECTIVENESS_SCHEMA + } + + for filename, schema in schemas.items(): + schema_file = output_dir / filename + with open(schema_file, 'w', encoding='utf-8') as f: + json.dump(schema, f, indent=2) + print(f"Saved schema to {schema_file}") + + +if __name__ == "__main__": + # Save schemas to the data directory + save_schemas_to_files(Path("../../data/validation")) diff --git a/tools/data/test_downloader.py b/tools/data/test_downloader.py new file mode 100644 index 0000000..cc781ed --- /dev/null +++ b/tools/data/test_downloader.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +""" +Test script for the Pokemon data downloader. + +This script tests the downloader with small data segments to ensure +everything works correctly before downloading larger datasets. +""" + +import sys +import tempfile +import shutil +from pathlib import Path +import json + +# Add the tools directory to Python path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from data.pokemon_downloader import PokemonDownloader +from data.schemas import DataValidator +from rich.console import Console +from rich.panel import Panel + +console = Console() + + +def test_small_pokemon_download(): + """Test downloading a small set of Pokemon (first 3).""" + console.print(Panel.fit("🧪 Testing Pokemon Download (IDs 1-3)", style="bold yellow")) + + # Create temporary directory for test output + with tempfile.TemporaryDirectory() as temp_dir: + downloader = PokemonDownloader(output_dir=Path(temp_dir)) + + # Download first 3 Pokemon + pokemon_data = downloader.download_pokemon_batch(1, 3, max_workers=2) + + if not pokemon_data: + console.print("❌ Failed to download any Pokemon") + return False + + console.print(f"✅ Downloaded {len(pokemon_data)} Pokemon:") + for pokemon_id, pokemon in pokemon_data.items(): + console.print(f" - #{pokemon_id}: {pokemon.name.title()} ({', '.join(pokemon.types)})") + + # Save and validate + downloader.save_pokemon_data(pokemon_data, "test_pokemon.json") + + # Check the saved file + saved_file = Path(temp_dir) / "test_pokemon.json" + if saved_file.exists(): + with open(saved_file) as f: + saved_data = json.load(f) + console.print(f"✅ Successfully saved {len(saved_data)} Pokemon to file") + + # Show first Pokemon details + first_pokemon = list(saved_data.values())[0] + console.print(f"📊 Sample data for {first_pokemon['name']}:") + console.print(f" - Types: {first_pokemon['types']}") + console.print(f" - Base HP: {first_pokemon['base_stats']['hp']}") + console.print(f" - Abilities: {first_pokemon['abilities'][:2]}...") # Show first 2 + console.print(f" - Move count: {len(first_pokemon['moves'])}") + + return True + + +def test_moves_download(): + """Test downloading a small set of moves.""" + console.print(Panel.fit("🧪 Testing Moves Download (IDs 1-5)", style="bold yellow")) + + with tempfile.TemporaryDirectory() as temp_dir: + downloader = PokemonDownloader(output_dir=Path(temp_dir)) + + # Download first 5 moves + move_ids = [1, 2, 3, 4, 5] # Pound, Karate Chop, Double Slap, Comet Punch, Mega Punch + moves_data = downloader.download_moves_batch(move_ids, max_workers=2) + + if not moves_data: + console.print("❌ Failed to download any moves") + return False + + console.print(f"✅ Downloaded {len(moves_data)} moves:") + for move_id, move in moves_data.items(): + power_str = f"{move.power} power" if move.power else "no power" + console.print(f" - #{move_id}: {move.name.title()} ({move.type}, {power_str})") + + # Save and validate + downloader.save_moves_data(moves_data, "test_moves.json") + + # Check the saved file + saved_file = Path(temp_dir) / "test_moves.json" + if saved_file.exists(): + with open(saved_file) as f: + saved_data = json.load(f) + console.print(f"✅ Successfully saved {len(saved_data)} moves to file") + + # Show move details + for move_data in list(saved_data.values())[:2]: # Show first 2 moves + console.print(f"📊 {move_data['name'].title()}:") + console.print(f" - Type: {move_data['type']}") + console.print(f" - Power: {move_data['power']}") + console.print(f" - Accuracy: {move_data['accuracy']}") + console.print(f" - PP: {move_data['pp']}") + + return True + + +def test_type_effectiveness(): + """Test downloading type effectiveness data.""" + console.print(Panel.fit("🧪 Testing Type Effectiveness Download", style="bold yellow")) + + with tempfile.TemporaryDirectory() as temp_dir: + downloader = PokemonDownloader(output_dir=Path(temp_dir)) + + # Download type effectiveness + effectiveness_data = downloader.download_type_effectiveness() + + if not effectiveness_data: + console.print("❌ Failed to download type effectiveness data") + return False + + console.print(f"✅ Downloaded {len(effectiveness_data)} type effectiveness entries") + + # Show some examples + console.print("📊 Sample type effectiveness entries:") + for entry in effectiveness_data[:5]: + factor_str = {0.0: "no effect", 0.5: "not very effective", 2.0: "super effective"} + console.print(f" - {entry.attacking_type} vs {entry.defending_type}: {factor_str.get(entry.damage_factor, str(entry.damage_factor))}") + + # Save and validate + downloader.save_type_effectiveness(effectiveness_data, "test_types.json") + + # Check the saved file + saved_file = Path(temp_dir) / "test_types.json" + if saved_file.exists(): + with open(saved_file) as f: + saved_data = json.load(f) + console.print(f"✅ Successfully saved {len(saved_data)} type effectiveness entries to file") + + return True + + +def test_validation(): + """Test the data validation system.""" + console.print(Panel.fit("🧪 Testing Data Validation", style="bold yellow")) + + validator = DataValidator() + + # Test valid Pokemon data + valid_pokemon = { + "1": { + "id": 1, + "name": "bulbasaur", + "types": ["grass", "poison"], + "base_stats": { + "hp": 45, + "attack": 49, + "defense": 49, + "special_attack": 65, + "special_defense": 65, + "speed": 45 + }, + "abilities": ["overgrow", "chlorophyll"], + "moves": [1, 2, 3, 4], + "weight": 69, + "height": 7, + "base_experience": 64 + } + } + + errors = validator.validate_pokemon_collection(valid_pokemon) + if errors: + console.print(f"❌ Validation failed for valid data: {errors}") + return False + else: + console.print("✅ Valid Pokemon data passed validation") + + # Test invalid Pokemon data + invalid_pokemon = { + "1": { + "id": 1, + "name": "bulbasaur", + "types": ["grass", "invalid_type"], # Invalid type + "base_stats": { + "hp": 45, + "attack": 49, + "defense": 49, + "special_attack": 65, + "special_defense": 65, + "speed": 45 + }, + "abilities": ["overgrow"], + "moves": [1, 2, 3, 4], + "weight": 69, + "height": 7, + "base_experience": 64 + } + } + + errors = validator.validate_pokemon_collection(invalid_pokemon) + if errors: + console.print(f"✅ Invalid Pokemon data correctly failed validation: {len(errors)} errors") + else: + console.print("❌ Invalid data should have failed validation") + return False + + return True + + +def test_integrated_download(): + """Test downloading Pokemon with their moves in an integrated fashion.""" + console.print(Panel.fit("🧪 Testing Integrated Pokemon + Moves Download", style="bold yellow")) + + with tempfile.TemporaryDirectory() as temp_dir: + downloader = PokemonDownloader(output_dir=Path(temp_dir)) + + # Download a single Pokemon (Pikachu) + pokemon_data = downloader.download_pokemon_batch(25, 25, max_workers=1) + + if not pokemon_data: + console.print("❌ Failed to download Pikachu") + return False + + pikachu = pokemon_data[25] + console.print(f"✅ Downloaded {pikachu.name.title()}") + console.print(f" - Types: {pikachu.types}") + console.print(f" - Base stats total: {sum(pikachu.base_stats.__dict__.values())}") + console.print(f" - Can learn {len(pikachu.moves)} moves") + + # Download first 10 moves that Pikachu can learn + pikachu_moves = pikachu.moves[:10] + moves_data = downloader.download_moves_batch(pikachu_moves, max_workers=3) + + if moves_data: + console.print(f"✅ Downloaded {len(moves_data)} of Pikachu's moves:") + for move_id, move in list(moves_data.items())[:5]: + console.print(f" - {move.name.title()} ({move.type} type)") + + # Save both datasets + downloader.save_pokemon_data(pokemon_data, "pikachu.json") + downloader.save_moves_data(moves_data, "pikachu_moves.json") + + return True + + +def run_all_tests(): + """Run all tests.""" + console.print(Panel.fit( + "🚀 Pokemon Data Downloader Test Suite", + style="bold green" + )) + + tests = [ + ("Pokemon Download", test_small_pokemon_download), + ("Moves Download", test_moves_download), + ("Type Effectiveness", test_type_effectiveness), + ("Data Validation", test_validation), + ("Integrated Download", test_integrated_download), + ] + + results = [] + + for test_name, test_func in tests: + console.print(f"\n{'='*50}") + try: + result = test_func() + results.append((test_name, result)) + status = "✅ PASSED" if result else "❌ FAILED" + console.print(f"{test_name}: {status}") + except Exception as e: + results.append((test_name, False)) + console.print(f"{test_name}: ❌ ERROR - {e}") + + # Summary + console.print(f"\n{'='*50}") + console.print("TEST SUMMARY:") + passed = sum(1 for _, result in results if result) + total = len(results) + + for test_name, result in results: + status = "✅" if result else "❌" + console.print(f" {status} {test_name}") + + console.print(f"\nOverall: {passed}/{total} tests passed") + + if passed == total: + console.print(Panel.fit("🎉 All tests passed! The downloader is ready to use.", style="bold green")) + else: + console.print(Panel.fit("⚠️ Some tests failed. Please check the errors above.", style="bold red")) + + return passed == total + + +if __name__ == "__main__": + success = run_all_tests() + sys.exit(0 if success else 1) diff --git a/tools/requirements.txt b/tools/requirements.txt index 065bc4d..b726e3b 100644 --- a/tools/requirements.txt +++ b/tools/requirements.txt @@ -2,6 +2,7 @@ requests>=2.28.0 # HTTP requests for data scraping beautifulsoup4>=4.11.0 # HTML parsing for web scraping lxml>=4.9.0 # XML/HTML parser +pokebase==1.4.1 # Pokemon API wrapper # Data processing pandas>=1.5.0 # Data manipulation and analysis