#!/usr/bin/env python3 """ Pokemon Data Downloader This tool downloads generation-specific Pokemon data from the PokeAPI (https://pokeapi.co/) and organizes it into the proper directory structure for the Pokemon Battle Engine. Usage: python pokemon_downloader.py --generations 1,2,3 --data-types types,pokemon,moves python pokemon_downloader.py --all-generations --all-data-types python pokemon_downloader.py --help Data Structure: data/ ├── types/ │ ├── generation-i.json │ ├── generation-ii.json │ └── ... ├── pokemon/ │ ├── generation-i.json │ ├── generation-ii.json │ └── ... └── moves/ ├── generation-i.json ├── generation-ii.json └── ... """ import argparse import json import os import sys import time from pathlib import Path from typing import Dict, List, Optional, Set import requests from tqdm import tqdm class PokemonDataDownloader: """Downloads and processes Pokemon data from PokeAPI.""" BASE_URL = "https://pokeapi.co/api/v2" # Generation mappings (approximate - some Pokemon/moves span generations) GENERATION_RANGES = { "generation-i": (1, 151), # Kanto "generation-ii": (152, 251), # Johto "generation-iii": (252, 386), # Hoenn "generation-iv": (387, 493), # Sinnoh "generation-v": (494, 649), # Unova "generation-vi": (650, 721), # Kalos "generation-vii": (722, 809), # Alola "generation-viii": (810, 905), # Galar "generation-ix": (906, 1025), # Paldea } def __init__(self, base_dir: str = "data"): """Initialize the downloader with base data directory.""" self.base_dir = Path(base_dir) self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'Pokemon-Battle-Engine-Data-Downloader/1.0' }) # Create base directories for subdir in ['types', 'pokemon', 'moves']: (self.base_dir / subdir).mkdir(parents=True, exist_ok=True) def _file_exists_and_valid(self, file_path: Path, min_items: int = 1) -> bool: """Check if a file exists and contains at least the minimum number of items.""" if not file_path.exists(): return False try: with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) # Handle different data structures if isinstance(data, dict): return len(data) >= min_items elif isinstance(data, list): return len(data) >= min_items else: return True # File exists and is valid JSON except (json.JSONDecodeError, KeyError, TypeError): return False def get_json(self, url: str, max_retries: int = 3) -> Optional[Dict]: """Fetch JSON data from URL with retry logic.""" for attempt in range(max_retries): try: response = self.session.get(url, timeout=30) response.raise_for_status() return response.json() except requests.RequestException as e: if attempt == max_retries - 1: print(f"Failed to fetch {url}: {e}") return None time.sleep(2 ** attempt) # Exponential backoff return None def get_all_generations(self) -> List[str]: """Get list of all Pokemon generations.""" data = self.get_json(f"{self.BASE_URL}/generation/") if not data: return list(self.GENERATION_RANGES.keys()) generations = [] for gen in data['results']: gen_name = gen['name'] if gen_name in self.GENERATION_RANGES: generations.append(gen_name) return sorted(generations) def download_type_effectiveness(self, generation: str) -> bool: """Download type effectiveness chart for a specific generation.""" output_file = self.base_dir / 'types' / f'{generation}.json' # Check if file already exists and has enough types (should have at least 15-18 types) if self._file_exists_and_valid(output_file, min_items=15): print(f"Using existing type effectiveness data for {generation}") return True print(f"Downloading type effectiveness for {generation}...") # Get generation data to find the version groups gen_data = self.get_json(f"{self.BASE_URL}/generation/{generation}/") if not gen_data: print(f"Failed to get generation data for {generation}") return False # Get the first version group for this generation version_groups = gen_data.get('version_groups', []) if not version_groups: print(f"No version groups found for {generation}") return False # Use the first version group to get types types_data = self.get_json(f"{self.BASE_URL}/type/") if not types_data: print("Failed to get types list") return False type_effectiveness = {} # For each type, get its damage relations for type_info in tqdm(types_data['results'], desc="Processing types"): type_name = type_info['name'] # Skip special types that don't have damage relations if type_name in ['unknown', 'shadow']: continue type_data = self.get_json(f"{self.BASE_URL}/type/{type_name}/") if not type_data: print(f"Failed to get data for type {type_name}") continue # Extract damage relations damage_relations = type_data.get('damage_relations', {}) type_effectiveness[type_name] = { 'double_damage_from': [t['name'] for t in damage_relations.get('double_damage_from', [])], 'double_damage_to': [t['name'] for t in damage_relations.get('double_damage_to', [])], 'half_damage_from': [t['name'] for t in damage_relations.get('half_damage_from', [])], 'half_damage_to': [t['name'] for t in damage_relations.get('half_damage_to', [])], 'no_damage_from': [t['name'] for t in damage_relations.get('no_damage_from', [])], 'no_damage_to': [t['name'] for t in damage_relations.get('no_damage_to', [])], } # Save the data with open(output_file, 'w', encoding='utf-8') as f: json.dump(type_effectiveness, f, indent=2, ensure_ascii=False) print(f"Saved type effectiveness data to {output_file}") return True def download_pokemon_data(self, generation: str) -> bool: """Download Pokemon data for a specific generation.""" output_file = self.base_dir / 'pokemon' / f'{generation}.json' # Check if file already exists and has expected number of Pokemon if generation in self.GENERATION_RANGES: min_id, max_id = self.GENERATION_RANGES[generation] expected_pokemon = max_id - min_id + 1 if self._file_exists_and_valid(output_file, min_items=expected_pokemon * 0.8): # Allow for some missing Pokemon print(f"Using existing Pokemon data for {generation}") return True print(f"Downloading Pokemon data for {generation}...") if generation not in self.GENERATION_RANGES: print(f"Unknown generation: {generation}") return False min_id, max_id = self.GENERATION_RANGES[generation] pokemon_list = [] # Download Pokemon in batches for pokemon_id in tqdm(range(min_id, max_id + 1), desc=f"Downloading {generation} Pokemon"): pokemon_data = self.get_json(f"{self.BASE_URL}/pokemon/{pokemon_id}/") if not pokemon_data: print(f"Failed to get Pokemon {pokemon_id}") continue # Extract relevant Pokemon data pokemon_info = { 'id': pokemon_data['id'], 'name': pokemon_data['name'], 'height': pokemon_data['height'], 'weight': pokemon_data['weight'], 'base_experience': pokemon_data.get('base_experience', 0), 'types': [t['type']['name'] for t in pokemon_data['types']], 'stats': { stat['stat']['name']: stat['base_stat'] for stat in pokemon_data['stats'] }, 'abilities': [ { 'name': ability['ability']['name'], 'is_hidden': ability['is_hidden'], 'slot': ability['slot'] } for ability in pokemon_data['abilities'] ] } # Get species data for additional information species_data = self.get_json(pokemon_data['species']['url']) if species_data: pokemon_info['species'] = { 'name': species_data['name'], 'generation': species_data['generation']['name'], 'is_legendary': species_data['is_legendary'], 'is_mythical': species_data['is_mythical'], 'is_baby': species_data.get('is_baby', False), } pokemon_list.append(pokemon_info) # Save the data with open(output_file, 'w', encoding='utf-8') as f: json.dump(pokemon_list, f, indent=2, ensure_ascii=False) print(f"Saved {len(pokemon_list)} Pokemon to {output_file}") return True def download_all_moves_data(self) -> bool: """Download all moves data and save to all_moves.json.""" all_moves_file = self.base_dir / 'all_moves.json' # Check if all moves file already exists and has reasonable size if all_moves_file.exists(): try: with open(all_moves_file, 'r', encoding='utf-8') as f: existing_data = json.load(f) if len(existing_data) > 800: # Should have at least 800+ moves print(f"Using existing all_moves.json with {len(existing_data)} moves") return True else: print(f"Existing all_moves.json only has {len(existing_data)} moves, re-downloading...") except (json.JSONDecodeError, KeyError): print("Existing all_moves.json is corrupted, re-downloading...") print("Downloading all moves data...") # Get all moves moves_data = self.get_json(f"{self.BASE_URL}/move/?limit=2000") if not moves_data: print("Failed to get moves list") return False all_moves = {} # Download move details for move_info in tqdm(moves_data['results'], desc="Downloading all moves"): move_data = self.get_json(move_info['url']) if not move_data: continue move_info = { 'id': move_data['id'], 'name': move_data['name'], 'generation': move_data['generation']['name'], 'power': move_data['power'], 'pp': move_data['pp'], 'accuracy': move_data['accuracy'], 'priority': move_data['priority'], 'damage_class': move_data['damage_class']['name'], 'type': move_data['type']['name'], 'target': move_data['target']['name'], 'effect_chance': move_data.get('effect_chance'), 'effect': move_data['effect_entries'][0]['effect'] if move_data['effect_entries'] else None, 'short_effect': move_data['effect_entries'][0]['short_effect'] if move_data['effect_entries'] else None, 'meta': move_data.get('meta', {}), } all_moves[move_data['id']] = move_info # Save all moves data with open(all_moves_file, 'w', encoding='utf-8') as f: json.dump(all_moves, f, indent=2, ensure_ascii=False) print(f"Saved {len(all_moves)} moves to {all_moves_file}") return True def download_moves_data(self, generation: str) -> bool: """Download moves data for a specific generation.""" output_file = self.base_dir / 'moves' / f'{generation}.json' # Check if file already exists and has a reasonable number of moves # Generation I should have ~165 moves, later generations should have more min_moves = 100 if generation == "generation-i" else 200 if self._file_exists_and_valid(output_file, min_items=min_moves): print(f"Using existing moves data for {generation}") return True print(f"Downloading moves data for {generation}...") # First ensure we have all moves data if not self.download_all_moves_data(): return False # Load all moves data all_moves_file = self.base_dir / 'all_moves.json' try: with open(all_moves_file, 'r', encoding='utf-8') as f: all_moves = json.load(f) except (FileNotFoundError, json.JSONDecodeError): print("Failed to load all_moves.json") return False moves_list = [] # Filter moves by generation for move_id, move_data in all_moves.items(): # Check if this move was introduced in the target generation or earlier move_generation = move_data['generation'] if self._compare_generations(move_generation, generation) > 0: continue # Move is from a later generation moves_list.append(move_data) # Save the filtered data with open(output_file, 'w', encoding='utf-8') as f: json.dump(moves_list, f, indent=2, ensure_ascii=False) print(f"Saved {len(moves_list)} moves to {output_file}") return True def _compare_generations(self, gen1: str, gen2: str) -> int: """Compare two generations. Returns -1 if gen1 < gen2, 0 if equal, 1 if gen1 > gen2.""" generations = list(self.GENERATION_RANGES.keys()) try: idx1 = generations.index(gen1) idx2 = generations.index(gen2) return (idx1 > idx2) - (idx1 < idx2) except ValueError: return 0 def download_all_data(self, generations: List[str], data_types: List[str]) -> bool: """Download all specified data types for all specified generations.""" success = True for generation in generations: print(f"\n{'='*50}") print(f"Processing {generation}") print(f"{'='*50}") for data_type in data_types: try: if data_type == 'types': if not self.download_type_effectiveness(generation): success = False elif data_type == 'pokemon': if not self.download_pokemon_data(generation): success = False elif data_type == 'moves': if not self.download_moves_data(generation): success = False else: print(f"Unknown data type: {data_type}") success = False except Exception as e: print(f"Error downloading {data_type} for {generation}: {e}") success = False return success def main(): """Main entry point for the command-line tool.""" parser = argparse.ArgumentParser( description="Download Pokemon data from PokeAPI", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python pokemon_downloader.py --generations generation-i --data-types types python pokemon_downloader.py --all-generations --all-data-types python pokemon_downloader.py --generations generation-i,generation-ii --data-types pokemon,moves """ ) parser.add_argument( '--generations', type=lambda x: [g.strip() for g in x.split(',')], help='Comma-separated list of generations to download (e.g., generation-i,generation-ii)' ) parser.add_argument( '--all-generations', action='store_true', help='Download data for all generations' ) parser.add_argument( '--data-types', type=lambda x: [dt.strip() for dt in x.split(',')], help='Comma-separated list of data types to download (types,pokemon,moves)' ) parser.add_argument( '--all-data-types', action='store_true', help='Download all data types' ) parser.add_argument( '--output-dir', default='data', help='Output directory for downloaded data (default: data)' ) args = parser.parse_args() # Validate arguments if not args.all_generations and not args.generations: print("Error: Must specify either --generations or --all-generations") sys.exit(1) if not args.all_data_types and not args.data_types: print("Error: Must specify either --data-types or --all-data-types") sys.exit(1) # Initialize downloader downloader = PokemonDataDownloader(args.output_dir) # Determine generations to download if args.all_generations: generations = downloader.get_all_generations() else: generations = args.generations # Determine data types to download if args.all_data_types: data_types = ['types', 'pokemon', 'moves'] else: data_types = args.data_types print(f"Downloading {data_types} data for generations: {generations}") print(f"Output directory: {args.output_dir}") # Download the data success = downloader.download_all_data(generations, data_types) if success: print(f"\n{'='*50}") print("Download completed successfully!") print(f"Data saved to: {args.output_dir}") print(f"{'='*50}") else: print(f"\n{'='*50}") print("Download completed with errors. Check the output above for details.") print(f"{'='*50}") sys.exit(1) if __name__ == '__main__': main()