484 lines
18 KiB
Python
Executable File
484 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Pokemon Data Downloader
|
|
|
|
This tool downloads generation-specific Pokemon data from the PokeAPI (https://pokeapi.co/)
|
|
and organizes it into the proper directory structure for the Pokemon Battle Engine.
|
|
|
|
Usage:
|
|
python pokemon_downloader.py --generations 1,2,3 --data-types types,pokemon,moves
|
|
python pokemon_downloader.py --all-generations --all-data-types
|
|
python pokemon_downloader.py --help
|
|
|
|
Data Structure:
|
|
data/
|
|
├── types/
|
|
│ ├── generation-i.json
|
|
│ ├── generation-ii.json
|
|
│ └── ...
|
|
├── pokemon/
|
|
│ ├── generation-i.json
|
|
│ ├── generation-ii.json
|
|
│ └── ...
|
|
└── moves/
|
|
├── generation-i.json
|
|
├── generation-ii.json
|
|
└── ...
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Set
|
|
|
|
import requests
|
|
from tqdm import tqdm
|
|
|
|
|
|
class PokemonDataDownloader:
|
|
"""Downloads and processes Pokemon data from PokeAPI."""
|
|
|
|
BASE_URL = "https://pokeapi.co/api/v2"
|
|
|
|
# Generation mappings (approximate - some Pokemon/moves span generations)
|
|
GENERATION_RANGES = {
|
|
"generation-i": (1, 151), # Kanto
|
|
"generation-ii": (152, 251), # Johto
|
|
"generation-iii": (252, 386), # Hoenn
|
|
"generation-iv": (387, 493), # Sinnoh
|
|
"generation-v": (494, 649), # Unova
|
|
"generation-vi": (650, 721), # Kalos
|
|
"generation-vii": (722, 809), # Alola
|
|
"generation-viii": (810, 905), # Galar
|
|
"generation-ix": (906, 1025), # Paldea
|
|
}
|
|
|
|
def __init__(self, base_dir: str = "data"):
|
|
"""Initialize the downloader with base data directory."""
|
|
self.base_dir = Path(base_dir)
|
|
self.session = requests.Session()
|
|
self.session.headers.update({
|
|
'User-Agent': 'Pokemon-Battle-Engine-Data-Downloader/1.0'
|
|
})
|
|
|
|
# Create base directories
|
|
for subdir in ['types', 'pokemon', 'moves']:
|
|
(self.base_dir / subdir).mkdir(parents=True, exist_ok=True)
|
|
|
|
def _file_exists_and_valid(self, file_path: Path, min_items: int = 1) -> bool:
|
|
"""Check if a file exists and contains at least the minimum number of items."""
|
|
if not file_path.exists():
|
|
return False
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Handle different data structures
|
|
if isinstance(data, dict):
|
|
return len(data) >= min_items
|
|
elif isinstance(data, list):
|
|
return len(data) >= min_items
|
|
else:
|
|
return True # File exists and is valid JSON
|
|
except (json.JSONDecodeError, KeyError, TypeError):
|
|
return False
|
|
|
|
def get_json(self, url: str, max_retries: int = 3) -> Optional[Dict]:
|
|
"""Fetch JSON data from URL with retry logic."""
|
|
for attempt in range(max_retries):
|
|
try:
|
|
response = self.session.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.RequestException as e:
|
|
if attempt == max_retries - 1:
|
|
print(f"Failed to fetch {url}: {e}")
|
|
return None
|
|
time.sleep(2 ** attempt) # Exponential backoff
|
|
return None
|
|
|
|
def get_all_generations(self) -> List[str]:
|
|
"""Get list of all Pokemon generations."""
|
|
data = self.get_json(f"{self.BASE_URL}/generation/")
|
|
if not data:
|
|
return list(self.GENERATION_RANGES.keys())
|
|
|
|
generations = []
|
|
for gen in data['results']:
|
|
gen_name = gen['name']
|
|
if gen_name in self.GENERATION_RANGES:
|
|
generations.append(gen_name)
|
|
|
|
return sorted(generations)
|
|
|
|
def download_type_effectiveness(self, generation: str) -> bool:
|
|
"""Download type effectiveness chart for a specific generation."""
|
|
output_file = self.base_dir / 'types' / f'{generation}.json'
|
|
|
|
# Check if file already exists and has enough types (should have at least 15-18 types)
|
|
if self._file_exists_and_valid(output_file, min_items=15):
|
|
print(f"Using existing type effectiveness data for {generation}")
|
|
return True
|
|
|
|
print(f"Downloading type effectiveness for {generation}...")
|
|
|
|
# Get generation data to find the version groups
|
|
gen_data = self.get_json(f"{self.BASE_URL}/generation/{generation}/")
|
|
if not gen_data:
|
|
print(f"Failed to get generation data for {generation}")
|
|
return False
|
|
|
|
# Get the first version group for this generation
|
|
version_groups = gen_data.get('version_groups', [])
|
|
if not version_groups:
|
|
print(f"No version groups found for {generation}")
|
|
return False
|
|
|
|
# Use the first version group to get types
|
|
types_data = self.get_json(f"{self.BASE_URL}/type/")
|
|
if not types_data:
|
|
print("Failed to get types list")
|
|
return False
|
|
|
|
type_effectiveness = {}
|
|
|
|
# For each type, get its damage relations
|
|
for type_info in tqdm(types_data['results'], desc="Processing types"):
|
|
type_name = type_info['name']
|
|
|
|
# Skip special types that don't have damage relations
|
|
if type_name in ['unknown', 'shadow']:
|
|
continue
|
|
|
|
type_data = self.get_json(f"{self.BASE_URL}/type/{type_name}/")
|
|
if not type_data:
|
|
print(f"Failed to get data for type {type_name}")
|
|
continue
|
|
|
|
# Extract damage relations
|
|
damage_relations = type_data.get('damage_relations', {})
|
|
|
|
type_effectiveness[type_name] = {
|
|
'double_damage_from': [t['name'] for t in damage_relations.get('double_damage_from', [])],
|
|
'double_damage_to': [t['name'] for t in damage_relations.get('double_damage_to', [])],
|
|
'half_damage_from': [t['name'] for t in damage_relations.get('half_damage_from', [])],
|
|
'half_damage_to': [t['name'] for t in damage_relations.get('half_damage_to', [])],
|
|
'no_damage_from': [t['name'] for t in damage_relations.get('no_damage_from', [])],
|
|
'no_damage_to': [t['name'] for t in damage_relations.get('no_damage_to', [])],
|
|
}
|
|
|
|
# Save the data
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(type_effectiveness, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"Saved type effectiveness data to {output_file}")
|
|
return True
|
|
|
|
def download_pokemon_data(self, generation: str) -> bool:
|
|
"""Download Pokemon data for a specific generation."""
|
|
output_file = self.base_dir / 'pokemon' / f'{generation}.json'
|
|
|
|
# Check if file already exists and has expected number of Pokemon
|
|
if generation in self.GENERATION_RANGES:
|
|
min_id, max_id = self.GENERATION_RANGES[generation]
|
|
expected_pokemon = max_id - min_id + 1
|
|
|
|
if self._file_exists_and_valid(output_file, min_items=expected_pokemon * 0.8): # Allow for some missing Pokemon
|
|
print(f"Using existing Pokemon data for {generation}")
|
|
return True
|
|
|
|
print(f"Downloading Pokemon data for {generation}...")
|
|
|
|
if generation not in self.GENERATION_RANGES:
|
|
print(f"Unknown generation: {generation}")
|
|
return False
|
|
|
|
min_id, max_id = self.GENERATION_RANGES[generation]
|
|
pokemon_list = []
|
|
|
|
# Download Pokemon in batches
|
|
for pokemon_id in tqdm(range(min_id, max_id + 1), desc=f"Downloading {generation} Pokemon"):
|
|
pokemon_data = self.get_json(f"{self.BASE_URL}/pokemon/{pokemon_id}/")
|
|
if not pokemon_data:
|
|
print(f"Failed to get Pokemon {pokemon_id}")
|
|
continue
|
|
|
|
# Extract relevant Pokemon data
|
|
pokemon_info = {
|
|
'id': pokemon_data['id'],
|
|
'name': pokemon_data['name'],
|
|
'height': pokemon_data['height'],
|
|
'weight': pokemon_data['weight'],
|
|
'base_experience': pokemon_data.get('base_experience', 0),
|
|
'types': [t['type']['name'] for t in pokemon_data['types']],
|
|
'stats': {
|
|
stat['stat']['name']: stat['base_stat']
|
|
for stat in pokemon_data['stats']
|
|
},
|
|
'abilities': [
|
|
{
|
|
'name': ability['ability']['name'],
|
|
'is_hidden': ability['is_hidden'],
|
|
'slot': ability['slot']
|
|
}
|
|
for ability in pokemon_data['abilities']
|
|
]
|
|
}
|
|
|
|
# Get species data for additional information
|
|
species_data = self.get_json(pokemon_data['species']['url'])
|
|
if species_data:
|
|
pokemon_info['species'] = {
|
|
'name': species_data['name'],
|
|
'generation': species_data['generation']['name'],
|
|
'is_legendary': species_data['is_legendary'],
|
|
'is_mythical': species_data['is_mythical'],
|
|
'is_baby': species_data.get('is_baby', False),
|
|
}
|
|
|
|
pokemon_list.append(pokemon_info)
|
|
|
|
# Save the data
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(pokemon_list, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"Saved {len(pokemon_list)} Pokemon to {output_file}")
|
|
return True
|
|
|
|
def download_all_moves_data(self) -> bool:
|
|
"""Download all moves data and save to all_moves.json."""
|
|
all_moves_file = self.base_dir / 'all_moves.json'
|
|
|
|
# Check if all moves file already exists and has reasonable size
|
|
if all_moves_file.exists():
|
|
try:
|
|
with open(all_moves_file, 'r', encoding='utf-8') as f:
|
|
existing_data = json.load(f)
|
|
if len(existing_data) > 800: # Should have at least 800+ moves
|
|
print(f"Using existing all_moves.json with {len(existing_data)} moves")
|
|
return True
|
|
else:
|
|
print(f"Existing all_moves.json only has {len(existing_data)} moves, re-downloading...")
|
|
except (json.JSONDecodeError, KeyError):
|
|
print("Existing all_moves.json is corrupted, re-downloading...")
|
|
|
|
print("Downloading all moves data...")
|
|
|
|
# Get all moves
|
|
moves_data = self.get_json(f"{self.BASE_URL}/move/?limit=2000")
|
|
if not moves_data:
|
|
print("Failed to get moves list")
|
|
return False
|
|
|
|
all_moves = {}
|
|
|
|
# Download move details
|
|
for move_info in tqdm(moves_data['results'], desc="Downloading all moves"):
|
|
move_data = self.get_json(move_info['url'])
|
|
if not move_data:
|
|
continue
|
|
|
|
move_info = {
|
|
'id': move_data['id'],
|
|
'name': move_data['name'],
|
|
'generation': move_data['generation']['name'],
|
|
'power': move_data['power'],
|
|
'pp': move_data['pp'],
|
|
'accuracy': move_data['accuracy'],
|
|
'priority': move_data['priority'],
|
|
'damage_class': move_data['damage_class']['name'],
|
|
'type': move_data['type']['name'],
|
|
'target': move_data['target']['name'],
|
|
'effect_chance': move_data.get('effect_chance'),
|
|
'effect': move_data['effect_entries'][0]['effect'] if move_data['effect_entries'] else None,
|
|
'short_effect': move_data['effect_entries'][0]['short_effect'] if move_data['effect_entries'] else None,
|
|
'meta': move_data.get('meta', {}),
|
|
}
|
|
|
|
all_moves[move_data['id']] = move_info
|
|
|
|
# Save all moves data
|
|
with open(all_moves_file, 'w', encoding='utf-8') as f:
|
|
json.dump(all_moves, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"Saved {len(all_moves)} moves to {all_moves_file}")
|
|
return True
|
|
|
|
def download_moves_data(self, generation: str) -> bool:
|
|
"""Download moves data for a specific generation."""
|
|
output_file = self.base_dir / 'moves' / f'{generation}.json'
|
|
|
|
# Check if file already exists and has a reasonable number of moves
|
|
# Generation I should have ~165 moves, later generations should have more
|
|
min_moves = 100 if generation == "generation-i" else 200
|
|
if self._file_exists_and_valid(output_file, min_items=min_moves):
|
|
print(f"Using existing moves data for {generation}")
|
|
return True
|
|
|
|
print(f"Downloading moves data for {generation}...")
|
|
|
|
# First ensure we have all moves data
|
|
if not self.download_all_moves_data():
|
|
return False
|
|
|
|
# Load all moves data
|
|
all_moves_file = self.base_dir / 'all_moves.json'
|
|
try:
|
|
with open(all_moves_file, 'r', encoding='utf-8') as f:
|
|
all_moves = json.load(f)
|
|
except (FileNotFoundError, json.JSONDecodeError):
|
|
print("Failed to load all_moves.json")
|
|
return False
|
|
|
|
moves_list = []
|
|
|
|
# Filter moves by generation
|
|
for move_id, move_data in all_moves.items():
|
|
# Check if this move was introduced in the target generation or earlier
|
|
move_generation = move_data['generation']
|
|
if self._compare_generations(move_generation, generation) > 0:
|
|
continue # Move is from a later generation
|
|
|
|
moves_list.append(move_data)
|
|
|
|
# Save the filtered data
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(moves_list, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"Saved {len(moves_list)} moves to {output_file}")
|
|
return True
|
|
|
|
def _compare_generations(self, gen1: str, gen2: str) -> int:
|
|
"""Compare two generations. Returns -1 if gen1 < gen2, 0 if equal, 1 if gen1 > gen2."""
|
|
generations = list(self.GENERATION_RANGES.keys())
|
|
try:
|
|
idx1 = generations.index(gen1)
|
|
idx2 = generations.index(gen2)
|
|
return (idx1 > idx2) - (idx1 < idx2)
|
|
except ValueError:
|
|
return 0
|
|
|
|
def download_all_data(self, generations: List[str], data_types: List[str]) -> bool:
|
|
"""Download all specified data types for all specified generations."""
|
|
success = True
|
|
|
|
for generation in generations:
|
|
print(f"\n{'='*50}")
|
|
print(f"Processing {generation}")
|
|
print(f"{'='*50}")
|
|
|
|
for data_type in data_types:
|
|
try:
|
|
if data_type == 'types':
|
|
if not self.download_type_effectiveness(generation):
|
|
success = False
|
|
elif data_type == 'pokemon':
|
|
if not self.download_pokemon_data(generation):
|
|
success = False
|
|
elif data_type == 'moves':
|
|
if not self.download_moves_data(generation):
|
|
success = False
|
|
else:
|
|
print(f"Unknown data type: {data_type}")
|
|
success = False
|
|
except Exception as e:
|
|
print(f"Error downloading {data_type} for {generation}: {e}")
|
|
success = False
|
|
|
|
return success
|
|
|
|
|
|
def main():
|
|
"""Main entry point for the command-line tool."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Download Pokemon data from PokeAPI",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python pokemon_downloader.py --generations generation-i --data-types types
|
|
python pokemon_downloader.py --all-generations --all-data-types
|
|
python pokemon_downloader.py --generations generation-i,generation-ii --data-types pokemon,moves
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--generations',
|
|
type=lambda x: [g.strip() for g in x.split(',')],
|
|
help='Comma-separated list of generations to download (e.g., generation-i,generation-ii)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--all-generations',
|
|
action='store_true',
|
|
help='Download data for all generations'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--data-types',
|
|
type=lambda x: [dt.strip() for dt in x.split(',')],
|
|
help='Comma-separated list of data types to download (types,pokemon,moves)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--all-data-types',
|
|
action='store_true',
|
|
help='Download all data types'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--output-dir',
|
|
default='data',
|
|
help='Output directory for downloaded data (default: data)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Validate arguments
|
|
if not args.all_generations and not args.generations:
|
|
print("Error: Must specify either --generations or --all-generations")
|
|
sys.exit(1)
|
|
|
|
if not args.all_data_types and not args.data_types:
|
|
print("Error: Must specify either --data-types or --all-data-types")
|
|
sys.exit(1)
|
|
|
|
# Initialize downloader
|
|
downloader = PokemonDataDownloader(args.output_dir)
|
|
|
|
# Determine generations to download
|
|
if args.all_generations:
|
|
generations = downloader.get_all_generations()
|
|
else:
|
|
generations = args.generations
|
|
|
|
# Determine data types to download
|
|
if args.all_data_types:
|
|
data_types = ['types', 'pokemon', 'moves']
|
|
else:
|
|
data_types = args.data_types
|
|
|
|
print(f"Downloading {data_types} data for generations: {generations}")
|
|
print(f"Output directory: {args.output_dir}")
|
|
|
|
# Download the data
|
|
success = downloader.download_all_data(generations, data_types)
|
|
|
|
if success:
|
|
print(f"\n{'='*50}")
|
|
print("Download completed successfully!")
|
|
print(f"Data saved to: {args.output_dir}")
|
|
print(f"{'='*50}")
|
|
else:
|
|
print(f"\n{'='*50}")
|
|
print("Download completed with errors. Check the output above for details.")
|
|
print(f"{'='*50}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|