Files
pokemon-battle-engine/tools/data/pokemon_downloader.py
2025-08-20 08:30:07 +09:00

484 lines
18 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Pokemon Data Downloader
This tool downloads generation-specific Pokemon data from the PokeAPI (https://pokeapi.co/)
and organizes it into the proper directory structure for the Pokemon Battle Engine.
Usage:
python pokemon_downloader.py --generations 1,2,3 --data-types types,pokemon,moves
python pokemon_downloader.py --all-generations --all-data-types
python pokemon_downloader.py --help
Data Structure:
data/
├── types/
│ ├── generation-i.json
│ ├── generation-ii.json
│ └── ...
├── pokemon/
│ ├── generation-i.json
│ ├── generation-ii.json
│ └── ...
└── moves/
├── generation-i.json
├── generation-ii.json
└── ...
"""
import argparse
import json
import os
import sys
import time
from pathlib import Path
from typing import Dict, List, Optional, Set
import requests
from tqdm import tqdm
class PokemonDataDownloader:
"""Downloads and processes Pokemon data from PokeAPI."""
BASE_URL = "https://pokeapi.co/api/v2"
# Generation mappings (approximate - some Pokemon/moves span generations)
GENERATION_RANGES = {
"generation-i": (1, 151), # Kanto
"generation-ii": (152, 251), # Johto
"generation-iii": (252, 386), # Hoenn
"generation-iv": (387, 493), # Sinnoh
"generation-v": (494, 649), # Unova
"generation-vi": (650, 721), # Kalos
"generation-vii": (722, 809), # Alola
"generation-viii": (810, 905), # Galar
"generation-ix": (906, 1025), # Paldea
}
def __init__(self, base_dir: str = "data"):
"""Initialize the downloader with base data directory."""
self.base_dir = Path(base_dir)
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Pokemon-Battle-Engine-Data-Downloader/1.0'
})
# Create base directories
for subdir in ['types', 'pokemon', 'moves']:
(self.base_dir / subdir).mkdir(parents=True, exist_ok=True)
def _file_exists_and_valid(self, file_path: Path, min_items: int = 1) -> bool:
"""Check if a file exists and contains at least the minimum number of items."""
if not file_path.exists():
return False
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Handle different data structures
if isinstance(data, dict):
return len(data) >= min_items
elif isinstance(data, list):
return len(data) >= min_items
else:
return True # File exists and is valid JSON
except (json.JSONDecodeError, KeyError, TypeError):
return False
def get_json(self, url: str, max_retries: int = 3) -> Optional[Dict]:
"""Fetch JSON data from URL with retry logic."""
for attempt in range(max_retries):
try:
response = self.session.get(url, timeout=30)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
if attempt == max_retries - 1:
print(f"Failed to fetch {url}: {e}")
return None
time.sleep(2 ** attempt) # Exponential backoff
return None
def get_all_generations(self) -> List[str]:
"""Get list of all Pokemon generations."""
data = self.get_json(f"{self.BASE_URL}/generation/")
if not data:
return list(self.GENERATION_RANGES.keys())
generations = []
for gen in data['results']:
gen_name = gen['name']
if gen_name in self.GENERATION_RANGES:
generations.append(gen_name)
return sorted(generations)
def download_type_effectiveness(self, generation: str) -> bool:
"""Download type effectiveness chart for a specific generation."""
output_file = self.base_dir / 'types' / f'{generation}.json'
# Check if file already exists and has enough types (should have at least 15-18 types)
if self._file_exists_and_valid(output_file, min_items=15):
print(f"Using existing type effectiveness data for {generation}")
return True
print(f"Downloading type effectiveness for {generation}...")
# Get generation data to find the version groups
gen_data = self.get_json(f"{self.BASE_URL}/generation/{generation}/")
if not gen_data:
print(f"Failed to get generation data for {generation}")
return False
# Get the first version group for this generation
version_groups = gen_data.get('version_groups', [])
if not version_groups:
print(f"No version groups found for {generation}")
return False
# Use the first version group to get types
types_data = self.get_json(f"{self.BASE_URL}/type/")
if not types_data:
print("Failed to get types list")
return False
type_effectiveness = {}
# For each type, get its damage relations
for type_info in tqdm(types_data['results'], desc="Processing types"):
type_name = type_info['name']
# Skip special types that don't have damage relations
if type_name in ['unknown', 'shadow']:
continue
type_data = self.get_json(f"{self.BASE_URL}/type/{type_name}/")
if not type_data:
print(f"Failed to get data for type {type_name}")
continue
# Extract damage relations
damage_relations = type_data.get('damage_relations', {})
type_effectiveness[type_name] = {
'double_damage_from': [t['name'] for t in damage_relations.get('double_damage_from', [])],
'double_damage_to': [t['name'] for t in damage_relations.get('double_damage_to', [])],
'half_damage_from': [t['name'] for t in damage_relations.get('half_damage_from', [])],
'half_damage_to': [t['name'] for t in damage_relations.get('half_damage_to', [])],
'no_damage_from': [t['name'] for t in damage_relations.get('no_damage_from', [])],
'no_damage_to': [t['name'] for t in damage_relations.get('no_damage_to', [])],
}
# Save the data
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(type_effectiveness, f, indent=2, ensure_ascii=False)
print(f"Saved type effectiveness data to {output_file}")
return True
def download_pokemon_data(self, generation: str) -> bool:
"""Download Pokemon data for a specific generation."""
output_file = self.base_dir / 'pokemon' / f'{generation}.json'
# Check if file already exists and has expected number of Pokemon
if generation in self.GENERATION_RANGES:
min_id, max_id = self.GENERATION_RANGES[generation]
expected_pokemon = max_id - min_id + 1
if self._file_exists_and_valid(output_file, min_items=expected_pokemon * 0.8): # Allow for some missing Pokemon
print(f"Using existing Pokemon data for {generation}")
return True
print(f"Downloading Pokemon data for {generation}...")
if generation not in self.GENERATION_RANGES:
print(f"Unknown generation: {generation}")
return False
min_id, max_id = self.GENERATION_RANGES[generation]
pokemon_list = []
# Download Pokemon in batches
for pokemon_id in tqdm(range(min_id, max_id + 1), desc=f"Downloading {generation} Pokemon"):
pokemon_data = self.get_json(f"{self.BASE_URL}/pokemon/{pokemon_id}/")
if not pokemon_data:
print(f"Failed to get Pokemon {pokemon_id}")
continue
# Extract relevant Pokemon data
pokemon_info = {
'id': pokemon_data['id'],
'name': pokemon_data['name'],
'height': pokemon_data['height'],
'weight': pokemon_data['weight'],
'base_experience': pokemon_data.get('base_experience', 0),
'types': [t['type']['name'] for t in pokemon_data['types']],
'stats': {
stat['stat']['name']: stat['base_stat']
for stat in pokemon_data['stats']
},
'abilities': [
{
'name': ability['ability']['name'],
'is_hidden': ability['is_hidden'],
'slot': ability['slot']
}
for ability in pokemon_data['abilities']
]
}
# Get species data for additional information
species_data = self.get_json(pokemon_data['species']['url'])
if species_data:
pokemon_info['species'] = {
'name': species_data['name'],
'generation': species_data['generation']['name'],
'is_legendary': species_data['is_legendary'],
'is_mythical': species_data['is_mythical'],
'is_baby': species_data.get('is_baby', False),
}
pokemon_list.append(pokemon_info)
# Save the data
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(pokemon_list, f, indent=2, ensure_ascii=False)
print(f"Saved {len(pokemon_list)} Pokemon to {output_file}")
return True
def download_all_moves_data(self) -> bool:
"""Download all moves data and save to all_moves.json."""
all_moves_file = self.base_dir / 'all_moves.json'
# Check if all moves file already exists and has reasonable size
if all_moves_file.exists():
try:
with open(all_moves_file, 'r', encoding='utf-8') as f:
existing_data = json.load(f)
if len(existing_data) > 800: # Should have at least 800+ moves
print(f"Using existing all_moves.json with {len(existing_data)} moves")
return True
else:
print(f"Existing all_moves.json only has {len(existing_data)} moves, re-downloading...")
except (json.JSONDecodeError, KeyError):
print("Existing all_moves.json is corrupted, re-downloading...")
print("Downloading all moves data...")
# Get all moves
moves_data = self.get_json(f"{self.BASE_URL}/move/?limit=2000")
if not moves_data:
print("Failed to get moves list")
return False
all_moves = {}
# Download move details
for move_info in tqdm(moves_data['results'], desc="Downloading all moves"):
move_data = self.get_json(move_info['url'])
if not move_data:
continue
move_info = {
'id': move_data['id'],
'name': move_data['name'],
'generation': move_data['generation']['name'],
'power': move_data['power'],
'pp': move_data['pp'],
'accuracy': move_data['accuracy'],
'priority': move_data['priority'],
'damage_class': move_data['damage_class']['name'],
'type': move_data['type']['name'],
'target': move_data['target']['name'],
'effect_chance': move_data.get('effect_chance'),
'effect': move_data['effect_entries'][0]['effect'] if move_data['effect_entries'] else None,
'short_effect': move_data['effect_entries'][0]['short_effect'] if move_data['effect_entries'] else None,
'meta': move_data.get('meta', {}),
}
all_moves[move_data['id']] = move_info
# Save all moves data
with open(all_moves_file, 'w', encoding='utf-8') as f:
json.dump(all_moves, f, indent=2, ensure_ascii=False)
print(f"Saved {len(all_moves)} moves to {all_moves_file}")
return True
def download_moves_data(self, generation: str) -> bool:
"""Download moves data for a specific generation."""
output_file = self.base_dir / 'moves' / f'{generation}.json'
# Check if file already exists and has a reasonable number of moves
# Generation I should have ~165 moves, later generations should have more
min_moves = 100 if generation == "generation-i" else 200
if self._file_exists_and_valid(output_file, min_items=min_moves):
print(f"Using existing moves data for {generation}")
return True
print(f"Downloading moves data for {generation}...")
# First ensure we have all moves data
if not self.download_all_moves_data():
return False
# Load all moves data
all_moves_file = self.base_dir / 'all_moves.json'
try:
with open(all_moves_file, 'r', encoding='utf-8') as f:
all_moves = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
print("Failed to load all_moves.json")
return False
moves_list = []
# Filter moves by generation
for move_id, move_data in all_moves.items():
# Check if this move was introduced in the target generation or earlier
move_generation = move_data['generation']
if self._compare_generations(move_generation, generation) > 0:
continue # Move is from a later generation
moves_list.append(move_data)
# Save the filtered data
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(moves_list, f, indent=2, ensure_ascii=False)
print(f"Saved {len(moves_list)} moves to {output_file}")
return True
def _compare_generations(self, gen1: str, gen2: str) -> int:
"""Compare two generations. Returns -1 if gen1 < gen2, 0 if equal, 1 if gen1 > gen2."""
generations = list(self.GENERATION_RANGES.keys())
try:
idx1 = generations.index(gen1)
idx2 = generations.index(gen2)
return (idx1 > idx2) - (idx1 < idx2)
except ValueError:
return 0
def download_all_data(self, generations: List[str], data_types: List[str]) -> bool:
"""Download all specified data types for all specified generations."""
success = True
for generation in generations:
print(f"\n{'='*50}")
print(f"Processing {generation}")
print(f"{'='*50}")
for data_type in data_types:
try:
if data_type == 'types':
if not self.download_type_effectiveness(generation):
success = False
elif data_type == 'pokemon':
if not self.download_pokemon_data(generation):
success = False
elif data_type == 'moves':
if not self.download_moves_data(generation):
success = False
else:
print(f"Unknown data type: {data_type}")
success = False
except Exception as e:
print(f"Error downloading {data_type} for {generation}: {e}")
success = False
return success
def main():
"""Main entry point for the command-line tool."""
parser = argparse.ArgumentParser(
description="Download Pokemon data from PokeAPI",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python pokemon_downloader.py --generations generation-i --data-types types
python pokemon_downloader.py --all-generations --all-data-types
python pokemon_downloader.py --generations generation-i,generation-ii --data-types pokemon,moves
"""
)
parser.add_argument(
'--generations',
type=lambda x: [g.strip() for g in x.split(',')],
help='Comma-separated list of generations to download (e.g., generation-i,generation-ii)'
)
parser.add_argument(
'--all-generations',
action='store_true',
help='Download data for all generations'
)
parser.add_argument(
'--data-types',
type=lambda x: [dt.strip() for dt in x.split(',')],
help='Comma-separated list of data types to download (types,pokemon,moves)'
)
parser.add_argument(
'--all-data-types',
action='store_true',
help='Download all data types'
)
parser.add_argument(
'--output-dir',
default='data',
help='Output directory for downloaded data (default: data)'
)
args = parser.parse_args()
# Validate arguments
if not args.all_generations and not args.generations:
print("Error: Must specify either --generations or --all-generations")
sys.exit(1)
if not args.all_data_types and not args.data_types:
print("Error: Must specify either --data-types or --all-data-types")
sys.exit(1)
# Initialize downloader
downloader = PokemonDataDownloader(args.output_dir)
# Determine generations to download
if args.all_generations:
generations = downloader.get_all_generations()
else:
generations = args.generations
# Determine data types to download
if args.all_data_types:
data_types = ['types', 'pokemon', 'moves']
else:
data_types = args.data_types
print(f"Downloading {data_types} data for generations: {generations}")
print(f"Output directory: {args.output_dir}")
# Download the data
success = downloader.download_all_data(generations, data_types)
if success:
print(f"\n{'='*50}")
print("Download completed successfully!")
print(f"Data saved to: {args.output_dir}")
print(f"{'='*50}")
else:
print(f"\n{'='*50}")
print("Download completed with errors. Check the output above for details.")
print(f"{'='*50}")
sys.exit(1)
if __name__ == '__main__':
main()