From 2a3ba4ea4c40de6721fa8a838e96db6ad80267b5 Mon Sep 17 00:00:00 2001 From: Connor De Meyer Date: Tue, 30 Dec 2025 12:16:52 +0900 Subject: [PATCH] Phase 1 & 2 --- .gitignore | 35 ++- .gitmodules | 8 + Cargo.toml | 20 ++ DESIGN.md | 192 ++++++++++++ Makefile | 80 +++++ NOTES.md | 4 + ROADMAP.md | 497 ++++++++++++++++++++++++++++++ cursebreaker-parser/Cargo.toml | 22 ++ cursebreaker-parser/src/main.rs | 155 ++++++++++ data/tests/unity-csharp-reference | 1 + data/tests/unity-sampleproject | 1 + scripts/setup-test-project.bat | 29 ++ scripts/setup-test-project.sh | 29 ++ unity-parser/Cargo.toml | 23 ++ unity-parser/src/asset.rs | 424 +++++++++++++++++++++++++ unity-parser/src/context.rs | 363 ++++++++++++++++++++++ unity-parser/src/error.rs | 101 ++++++ unity-parser/src/lib.rs | 68 ++++ unity-parser/src/meta.rs | 333 ++++++++++++++++++++ unity-parser/src/types.rs | 445 ++++++++++++++++++++++++++ unity-parser/src/world.rs | 258 ++++++++++++++++ 21 files changed, 3073 insertions(+), 15 deletions(-) create mode 100644 .gitmodules create mode 100644 Cargo.toml create mode 100644 DESIGN.md create mode 100644 Makefile create mode 100644 NOTES.md create mode 100644 ROADMAP.md create mode 100644 cursebreaker-parser/Cargo.toml create mode 100644 cursebreaker-parser/src/main.rs create mode 160000 data/tests/unity-csharp-reference create mode 160000 data/tests/unity-sampleproject create mode 100644 scripts/setup-test-project.bat create mode 100644 scripts/setup-test-project.sh create mode 100644 unity-parser/Cargo.toml create mode 100644 unity-parser/src/asset.rs create mode 100644 unity-parser/src/context.rs create mode 100644 unity-parser/src/error.rs create mode 100644 unity-parser/src/lib.rs create mode 100644 unity-parser/src/meta.rs create mode 100644 unity-parser/src/types.rs create mode 100644 unity-parser/src/world.rs diff --git a/.gitignore b/.gitignore index 0b188bc..6157323 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1,23 @@ -# ---> Rust -# Generated by Cargo -# will have compiled files and executables -debug/ -target/ - -# These are backup files generated by rustfmt +# Rust +/target/ **/*.rs.bk - -# MSVC Windows builds of rustc generate these, which store debugging information *.pdb +Cargo.lock -# RustRover -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Environment +.env + +# OS +.DS_Store +Thumbs.db + +# Build artifacts +/unity-parser/target/ +/cursebreaker-parser/target/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..aff3931 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,8 @@ +[submodule "data/tests/unity-sampleproject"] + path = data/tests/unity-sampleproject + url = https://github.com/heroiclabs/unity-sampleproject + branch = master +[submodule "data/tests/unity-csharp-reference"] + path = data/tests/unity-csharp-reference + url = https://github.com/Unity-Technologies/UnityCsReference + branch = 2018.1 diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4e487a8 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +[workspace] +members = [ + "unity-parser", + "cursebreaker-parser" +] +resolver = "2" + +[workspace.dependencies] +# Common dependencies can be defined here for sharing across crates +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +serde_json = "1.0" +anyhow = "1.0" +tokio = { version = "1.0", features = ["full"] } +glam = "0.24" +sparsey = "0.11" +walkdir = "2.4" +rayon = "1.8" +thiserror = "1.0" +dotenvy = "0.15" diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000..f0444db --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,192 @@ +# Unity Parser – Design Document + +## Overview + +**Unity Parser** is a Rust library for parsing local Unity projects (scenes and prefabs) from their YAML representation (`.unity` and `.prefab` files) and loading the resulting data into an ECS world. + +The primary goal is to enable users to: +- Selectively extract only the data they care about (minimal memory footprint). +- Mirror Unity MonoBehaviour types in Rust with minimal boilerplate. +- Query the fully instantiated scene (including all nested prefabs) using ECS queries. + +Use cases include: +- Modding tools +- Static analysis +- Database generation +- Asset inspection / reporting +- Custom exporters + +The library is **offline-only** – it works exclusively on exported Unity project files (YAML + assets). No runtime or in-engine integration is planned. + +## Core Principles + +- **Minimal memory usage**: Only parse and store components explicitly requested by the user. +- **Fast setup**: Users declare desired types via a single procedural macro. +- **Full prefab instantiation**: All prefabs (including nested/variant) are fully expanded into the scene. +- **Simple querying**: Users work directly with the ECS world (Sparsey) or optional helper methods. + +## Architecture + +### ECS Backend +- **Sparsey** is used as the ECS implementation. + - Rationale: Lightweight, excellent insertion performance, no archetype overhead. + - Query performance trade-off is acceptable because queries are infrequent (typically once or a few times per tool run, not per-frame like in games). +- Each loaded scene gets its own `World` (Sparsey terminology). +- The ECS world is **exposed directly** to users for maximum flexibility. +- Optional ergonomic helpers may be added later (e.g., `scene.foreach::<(GameObject, Transform, Interactable)>(|...|)`). + +### Data Flow +1. User configures which component types to parse (via macro). +2. Library scans project for relevant `.unity`, `.prefab`, and `.meta` files. +3. Scenes and prefabs are streamed parsed (YAML). +4. Only declared components are deserialized and inserted. +5. Prefab instances are recursively instantiated (new fileID mapping per nesting level). +6. After all objects are created, world transforms are computed in a post-process pass. +7. Resulting `World` is returned (or cached). + +## User Configuration + +Users declare all desired types with a single procedural macro: + +```rust +#[unity_parser( + // Built-in Unity components (non-script) + unity_types(Transform, MeshFilter, MeshRenderer, Collider /* ... */), + + // Custom MonoBehaviour components + custom_types(Interactable, Harvestable, LootContainer, EnemyAI), + + // Asset types beyond scenes/prefabs (future extension) + asset_types(/* Material, Texture2D */) +)] +struct MyProjectConfig; +``` + +### Rules +- **unity_types**: Built-in Unity components (no associated script). +- **custom_types**: User-defined structs that mirror MonoBehaviour scripts. + - Struct name **must exactly match** the C# class name. + - The parser will automatically locate the corresponding `.cs` file to extract its GUID for matching YAML entries. +- Users **must explicitly list** every component they want. Nothing is parsed by default. +- Examples and common sets will be provided in documentation. + +## Component Definition + +Components are plain Rust structs mirroring Unity’s serialized fields. + +```rust +#[derive(Component)] +struct Transform { + local_position: Vec3, + local_rotation: Quat, + local_scale: Vec3, + world_matrix: Mat4, // Computed in post-process + parent: Option, + children: Vec, +} + +#[derive(Component)] +struct Interactable { + interaction_prompt: String, + radius: f32, +} +``` + +- Users can implement custom parsing logic if needed. +- Derive macros will offer automatic field parsing for common cases. + +### Special Cases +- **GameObject**: Not a true component, but stored as a component containing: + - `name: String` + - `layer: u32` + - `active: bool` + +## Prefab Instantiation + +- Full support for **nested prefabs** (modern Unity prefab workflow). +- Strategy: + - Prefabs are parsed exactly like scenes. + - When a `PrefabInstance` is encountered, the referenced prefab is loaded recursively. + - A new `HashMap` mapping is created for each nesting level. + - Overrides are applied only to property values (via `propertyPath`). + - Current scope: **only property overrides** are applied. + - TODO: Support added/removed components, reordered children, removed GameObjects. + +## Asset Handling + +All parsable assets implement a trait: + +```rust +trait AssetParser { + fn extensions() -> &'static [&'static str]; + fn parse(yaml: &YamlNode, context: &ParseContext) -> Result; +} +``` + +- Built-in: `.unity` (scenes), `.prefab` (prefabs). +- `.meta` files are parsed to build GUID ↔ path mappings. +- Future extension possible for other YAML assets (e.g., ScriptableObjects). + +## Selective Parsing & Memory + +- Only components listed in the config macro are parsed. +- During YAML streaming, unknown component types (`!u!XXX`) are **completely skipped** – no allocation, no temporary structures. +- Goal: Load even very large scenes (hundreds of thousands of objects) into moderate RAM when only a subset of components is requested. + +## Transform Hierarchy + +- Local transforms are parsed immediately. +- Parent/child relationships are recorded. +- **World matrices and full hierarchy** are computed in a single post-process pass after all entities exist. + +## Caching + +- Optional caching to SQLite. +- **Single database file** containing all scenes. +- Tables: + - `scenes(scene_path PRIMARY KEY, hash, timestamp)` + - `entities(entity_id, scene_path, gameobject_name, layer, active)` + - One table per component type (e.g., `transform`, `interactable`) +- Cache contains **only final ECS data** (post-instantiation, post-transform pass). +- No sophisticated invalidation: user controls caching via flag/option. + - `parse(..., use_cache: bool)` + - CLI: `--cache` / `--no-cache` +- Cache is regenerated completely when enabled and source files are newer or cache missing. + +## API Sketch + +```rust +let world = unity_parser::parse::( + project_root: "/path/to/unity/project", + scenes: vec!["Assets/Scenes/Level1.unity"], + use_cache: true, + max_parallel: Some(4), +)?; +``` + +- `ParserBuilder` may be added later for more configuration. +- Parallel parsing of independent scenes/prefabs is supported (rayon, limited to 4 jobs by default to control memory). + +## Error Handling + +- Malformed YAML or missing references: log warning/error, continue parsing. +- Missing expected component fields: log, insert default/None where possible. +- Critical failures (e.g., corrupted scene file): return `Err`. + +## Future Considerations / TODOs + +- ParserBuilder API +- Automatic derive for common component parsing +- Support for added/removed components in prefab overrides +- Component serialization versioning +- More asset types (Materials, Animators, etc.) +- Binary cache format for faster loading +- Helper query methods on top of raw Sparsey API + +## Testing + +To test this repo, another project will be made in the same repository directory that will load the "Cursebreaker" game that can be found at a certain path that can be configured in the `.env` file. + +## Summary + +Unity Parser aims to be the fastest, most memory-efficient way to extract structured data from Unity YAML projects in Rust, with a focus on user-defined components and full prefab instantiation. By leveraging Sparsey and aggressive selective parsing, it enables tools that process massive Unity scenes on ordinary hardware. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8c044c1 --- /dev/null +++ b/Makefile @@ -0,0 +1,80 @@ +# Unity Parser Build and Test Automation +# Cross-platform Makefile + +.PHONY: default setup-test build test test-cursebreaker test-unit run run-cursebreaker clean clean-all ci dev help + +# Default target +default: test + +# Setup test projects (git submodules) +setup-test: + @echo "Setting up test Unity projects..." + call scripts\setup-test-project.bat + +# Build in release mode +build: + cargo build --release + +# Run tests with test projects (integration tests) +test: setup-test + @echo "Running tests with test projects..." + cargo test --lib -- --nocapture + +# Run tests with Cursebreaker project (large) +test-cursebreaker: + @echo "Running tests with Cursebreaker project..." + @TEST_PROJECT_PATH="c:/Repos/CBAssets" cargo test --lib -- --nocapture + +# Run unit tests only (no integration tests) +test-unit: + cargo test --lib -- --nocapture + +# Run CLI tool with first available test project +run: build + .\target\release\cursebreaker-parser.exe scan --project "data/tests/unity-sampleproject/PiratePanic" + +# Run CLI tool with Cursebreaker project +run-cursebreaker: build + ./target/release/cursebreaker-parser scan --project "c:/Repos/CBAssets" + +# Clean build artifacts +clean: + cargo clean + +# Clean everything including test projects +clean-all: + cargo clean + git submodule deinit -f --all + @if [ -d "data/tests" ]; then \ + if command -v rmdir >/dev/null 2>&1 && [ "$$(uname -s)" = "Windows_NT" ]; then \ + rmdir /s /q data/tests 2>/dev/null || true; \ + else \ + rm -rf data/tests; \ + fi; \ + fi + +# Full CI pipeline +ci: clean setup-test build test + +# Development workflow +dev: setup-test + cargo build + cargo test --lib + +# Show available targets +help: + @echo "Unity Parser Build and Test Automation" + @echo "" + @echo "Available targets:" + @echo " setup-test - Initialize git submodules for test projects" + @echo " build - Build in release mode" + @echo " test - Run all tests (with test projects)" + @echo " test-unit - Run unit tests only (no integration)" + @echo " test-cursebreaker - Run tests with Cursebreaker project" + @echo " run - Run CLI with test project" + @echo " run-cursebreaker - Run CLI with Cursebreaker project" + @echo " clean - Clean build artifacts" + @echo " clean-all - Clean everything including test projects" + @echo " ci - Full CI pipeline (clean + setup + build + test)" + @echo " dev - Development workflow (setup + build + test)" + @echo " help - Show this help message" diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..d91a668 --- /dev/null +++ b/NOTES.md @@ -0,0 +1,4 @@ +# Unity Parser - Development Notes + +Please put brief notes/memories in here. + diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..50c9033 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,497 @@ +# Unity Parser - Implementation Roadmap + +## Current Status + +**Repository Structure:** +- `unity-parser/` - Main library crate (empty, needs implementation) +- `cursebreaker-parser/` - CLI binary for testing (empty, needs implementation) +- `unity-project-derive/` - Procedural macro crate (doesn't exist yet, needs creation) + +**Dependencies Already Configured:** +- Sparsey (not yet added to Cargo.toml) +- serde, serde_yaml, serde_json +- anyhow, glam +- tokio (for async operations) +- clap, rusqlite, sqlx (for CLI tool) + +--- + +## Phase 1: Core Foundation ✅ **COMPLETE** + +**Goal:** Establish basic project structure and core types + +### Tasks +- [x] Create `unity-parser/src/lib.rs` with module structure + - [x] `mod error;` - Error types + - [x] `mod types;` - Core types (FileID, GUID, etc.) + - [x] `mod meta;` - .meta file parser + - [x] `mod context;` - ParseContext + - [x] `mod asset;` - AssetParser trait + - [x] `mod world;` - World builder + +- [x] Add Sparsey ECS dependency to `unity-parser/Cargo.toml` + ```toml + sparsey = "0.11" + ``` + +- [ ] Create `unity-project-derive/` procedural macro crate (deferred to Phase 7) + - [ ] Initialize with `cargo new --lib unity-project-derive` + - [ ] Add to workspace members in root `Cargo.toml` + - [ ] Add proc-macro dependencies (syn, quote, proc-macro2) + +- [x] Define core types in `unity-parser/src/types.rs` + - [x] `FileID` - Unity's local file identifier + - [x] `GUID` - Unity's global unique identifier + - [x] `Reference` - Represents `{fileID: X, guid: Y}` references + - [x] `PropertyPath` - Represents prefab override paths + +- [x] Implement error types in `unity-parser/src/error.rs` + - [x] `ParseError` enum with variants: + - `InvalidYaml` + - `MissingFile` + - `InvalidReference` + - `ComponentNotFound` + - [x] Implement `std::error::Error` and `Display` + +- [x] Create .meta file parser in `unity-parser/src/meta.rs` + - [x] Parse GUID from .meta files + - [x] Build GUID → file path mapping + - [x] Scan project directory for all .meta files + +- [x] Implement `ParseContext` in `unity-parser/src/context.rs` + - [x] Store GUID mappings + - [x] Store FileID → Entity mappings + - [x] Cache loaded prefabs + - [x] Project root path + +**Completion Criteria:** ✅ Can scan a Unity project and extract all GUIDs from .meta files. + +**See `PHASE1_COMPLETE.md` for detailed summary.** + +--- + +## Phase 2: YAML Parsing 🔜 + +**Goal:** Parse Unity's YAML format efficiently with selective component loading + +### Tasks +- [ ] Create `unity-parser/src/yaml/mod.rs` module + - [ ] `mod document;` - Unity document structure + - [ ] `mod stream;` - Streaming YAML parser + - [ ] `mod property_path;` - Property path parser + +- [ ] Implement Unity document parser (`yaml/document.rs`) + - [ ] Parse `--- !u!XXX &fileID` headers + - [ ] Extract component type + - [ ] Extract fileID anchor + - [ ] Parse YAML body into generic structure + +- [ ] Create selective streaming parser (`yaml/stream.rs`) + - [ ] Skip unknown component types without allocation + - [ ] Only deserialize registered component types + - [ ] Handle Unity's special YAML syntax quirks + +- [ ] Implement reference parser + - [ ] Parse `{fileID: X}` (local references) + - [ ] Parse `{fileID: X, guid: Y}` (external references) + - [ ] Resolve references to Entity IDs + +- [ ] Implement property path parser (`yaml/property_path.rs`) + - [ ] Parse paths like `m_LocalPosition.x` + - [ ] Support array indices: `m_Children.Array.data[0]` + - [ ] Apply overrides to parsed components + +**Completion Criteria:** Can parse a simple .unity scene file and extract GameObject names. + +--- + +## Phase 3: Component System 🔜 + +**Goal:** Define component traits and implement built-in Unity components + +### Tasks +- [ ] Create `unity-parser/src/components/mod.rs` + - [ ] `mod builtin;` - Built-in Unity components + - [ ] `mod custom;` - Custom component registry + - [ ] `mod traits;` - Component traits + +- [ ] Define `AssetParser` trait (`asset.rs`) + ```rust + trait AssetParser { + fn extensions() -> &'static [&'static str]; + fn parse(yaml: &YamlNode, context: &ParseContext) -> Result; + } + ``` + +- [ ] Implement `GameObject` pseudo-component + - [ ] `name: String` + - [ ] `layer: u32` + - [ ] `active: bool` + - [ ] `tag: String` + +- [ ] Implement `Transform` component (`components/builtin/transform.rs`) + - [ ] `local_position: Vec3` + - [ ] `local_rotation: Quat` + - [ ] `local_scale: Vec3` + - [ ] `world_matrix: Mat4` (computed later) + - [ ] `parent: Option` + - [ ] `children: Vec` + +- [ ] Implement other common Unity components + - [ ] `MeshFilter` (mesh reference) + - [ ] `MeshRenderer` (materials) + - [ ] `Collider` types (Box, Sphere, Capsule, Mesh) + - [ ] `Rigidbody` + - [ ] `Camera` + - [ ] `Light` + +- [ ] Create component registry system + - [ ] Map Unity type tags (`!u!1`, `!u!4`, etc.) to Rust types + - [ ] Map MonoBehaviour script GUIDs to custom types + - [ ] Provide lookup functions + +**Completion Criteria:** Can parse a scene with GameObjects and Transforms into structs. + +--- + +## Phase 4: ECS Integration 🔜 + +**Goal:** Load parsed data into Sparsey ECS world + +### Tasks +- [ ] Create `unity-parser/src/world/mod.rs` + - [ ] `mod builder;` - World builder + - [ ] `mod entity_map;` - FileID → Entity mapping + +- [ ] Implement `WorldBuilder` + - [ ] Create Sparsey `World` + - [ ] Track FileID → Entity mappings + - [ ] Insert components into entities + - [ ] Handle component dependencies + +- [ ] Create scene loading pipeline + - [ ] Parse all GameObjects first (create entities) + - [ ] Parse and attach components in second pass + - [ ] Resolve all references + - [ ] Return completed `World` + +- [ ] Implement basic scene loader + ```rust + pub fn load_scene(path: &Path, context: &ParseContext) -> Result + ``` + +- [ ] Create query helper utilities + - [ ] Wrapper around Sparsey queries + - [ ] Type-safe component access + - [ ] Optional ergonomic helpers + +**Completion Criteria:** Can load a simple scene into Sparsey and query entities with specific components. + +--- + +## Phase 5: Prefab System 🔜 + +**Goal:** Support nested prefab instantiation with overrides + +### Tasks +- [ ] Create `unity-parser/src/prefab/mod.rs` + - [ ] `mod instance;` - Prefab instance handling + - [ ] `mod overrides;` - Property override application + - [ ] `mod nesting;` - Nested prefab support + +- [ ] Implement prefab loading + - [ ] Load `.prefab` files like scenes + - [ ] Cache loaded prefabs in `ParseContext` + - [ ] Prevent circular references + +- [ ] Create `PrefabInstance` component parser + - [ ] Extract source prefab GUID + - [ ] Extract modification list + - [ ] Parse property overrides + +- [ ] Implement prefab instantiation + - [ ] Clone prefab entities into current world + - [ ] Create new FileID mapping scope for each instance + - [ ] Recursively handle nested prefabs + - [ ] Maintain parent-child relationships + +- [ ] Apply property overrides + - [ ] Parse property paths + - [ ] Navigate to target component field + - [ ] Apply override value + - [ ] Support all field types (scalars, arrays, references) + +- [ ] Handle prefab variants + - [ ] Load base prefab first + - [ ] Apply variant overrides on top + +**Completion Criteria:** Can load a scene with nested prefab instances and all overrides applied correctly. + +--- + +## Phase 6: Transform Hierarchy 🔜 + +**Goal:** Compute world-space transforms from local transforms + +### Tasks +- [ ] Create `unity-parser/src/transform/mod.rs` + - [ ] `mod hierarchy;` - Parent-child traversal + - [ ] `mod compute;` - World matrix computation + +- [ ] Implement hierarchy builder + - [ ] Build parent → children map + - [ ] Detect root transforms (no parent) + - [ ] Validate hierarchy (no cycles) + +- [ ] Implement world transform computation + - [ ] Traverse hierarchy depth-first + - [ ] Compute world matrix: `parent.world * local` + - [ ] Handle scale, rotation, position correctly + - [ ] Cache results in Transform components + +- [ ] Create post-process pass + - [ ] Run after all entities and prefabs loaded + - [ ] Single pass over all transforms + - [ ] Update all Transform.world_matrix fields + +**Completion Criteria:** World-space positions are correctly computed for nested GameObjects and prefab instances. + +--- + +## Phase 7: Procedural Macros 🔜 + +**Goal:** Implement ergonomic macro API for configuration + +### Tasks +- [ ] Set up `unity-project-derive/src/lib.rs` + - [ ] Add proc-macro crate type + - [ ] Import syn, quote dependencies + +- [ ] Implement `#[unity_parser(...)]` configuration macro + - [ ] Parse `unity_types(...)` list + - [ ] Parse `custom_types(...)` list + - [ ] Parse optional `asset_types(...)` list + - [ ] Generate type registry + - [ ] Generate parser configuration struct + +- [ ] Implement `#[derive(Component)]` macro + - [ ] Generate field parsing code + - [ ] Handle common field types automatically + - [ ] Allow custom parsing attributes + - [ ] Generate `FromYaml` trait impl + +- [ ] Create script GUID extraction tool + - [ ] Scan project for `.cs` files + - [ ] Parse file to find class name + - [ ] Read corresponding `.meta` file for GUID + - [ ] Build MonoBehaviour GUID → Rust type map + +- [ ] Generate type registration at compile time + - [ ] Map Unity tags to built-in types + - [ ] Map script GUIDs to custom types + - [ ] Create static registry + +**Completion Criteria:** User can declare desired types with single macro, no manual registration needed. + +--- + +## Phase 8: Caching Layer 🔜 + +**Goal:** Add optional SQLite caching for faster subsequent loads + +### Tasks +- [ ] Create `unity-parser/src/cache/mod.rs` + - [ ] `mod schema;` - Dynamic schema generation + - [ ] `mod storage;` - SQLite operations + - [ ] `mod invalidation;` - Cache validation + +- [ ] Implement schema generation + - [ ] Create `scenes` table + - [ ] Generate table per component type + - [ ] Use reflection/macro data for columns + - [ ] Handle relationships (foreign keys) + +- [ ] Implement cache storage + - [ ] Serialize ECS world to SQLite + - [ ] Store entity IDs and components + - [ ] Store metadata (timestamps, hashes) + +- [ ] Implement cache loading + - [ ] Deserialize from SQLite to World + - [ ] Reconstruct entities and components + - [ ] Restore references + +- [ ] Add cache invalidation + - [ ] Hash scene and prefab files + - [ ] Compare timestamps + - [ ] Invalidate on source changes + - [ ] User-controlled cache refresh + +- [ ] Add cache configuration + - [ ] `use_cache: bool` parameter + - [ ] Cache location configuration + - [ ] Per-scene caching + +**Completion Criteria:** Second load of same scene is 10x+ faster when cached. + +--- + +## Phase 9: CLI Tool 🔜 + +**Goal:** Create functional `cursebreaker-parser` binary + +### Tasks +- [ ] Implement `cursebreaker-parser/src/main.rs` + - [ ] Command-line argument parsing (clap) + - [ ] Config file support (`.env` for game path) + +- [ ] Add CLI commands + - [ ] `parse ` - Parse and display scene info + - [ ] `export ` - Export to JSON/SQL + - [ ] `list` - List all scenes in project + - [ ] `cache clear` - Clear cache + +- [ ] Implement progress reporting + - [ ] Progress bars for large scenes + - [ ] File count and size statistics + - [ ] Error/warning summary + +- [ ] Add export formats + - [ ] JSON (full scene dump) + - [ ] SQL (INSERT statements) + - [ ] CSV (per-component type) + - [ ] Custom format (user-defined) + +- [ ] Configure Cursebreaker game path + - [ ] Read from `.env` file + - [ ] Example `.env.example` + - [ ] Path validation + +- [ ] Create example configuration + - [ ] Define Cursebreaker-specific components + - [ ] Use `#[unity_parser(...)]` macro + - [ ] Document component types + +**Completion Criteria:** Can successfully parse Cursebreaker game and export data. + +--- + +## Phase 10: Testing & Documentation 🔜 + +**Goal:** Validate implementation and provide comprehensive documentation + +### Tasks +- [ ] Create unit tests + - [ ] Test YAML parsing + - [ ] Test component deserialization + - [ ] Test reference resolution + - [ ] Test prefab instantiation + - [ ] Test transform computation + +- [ ] Create integration tests + - [ ] Test with minimal Unity project + - [ ] Test with nested prefabs + - [ ] Test with various component types + - [ ] Test cache functionality + +- [ ] Test with Cursebreaker game + - [ ] Load actual game scenes + - [ ] Verify data correctness + - [ ] Measure performance + - [ ] Handle edge cases + +- [ ] Write API documentation + - [ ] Document all public types + - [ ] Document all public functions + - [ ] Add usage examples + - [ ] Document macro syntax + +- [ ] Update README.md + - [ ] Quick start guide + - [ ] Installation instructions + - [ ] Basic usage examples + - [ ] Feature list + - [ ] License and contributing + +- [ ] Create examples + - [ ] `examples/basic_scene.rs` - Load simple scene + - [ ] `examples/prefab_query.rs` - Query prefabs + - [ ] `examples/export_json.rs` - Export to JSON + - [ ] `examples/custom_component.rs` - Define custom component + +- [ ] Performance benchmarks + - [ ] Benchmark scene loading + - [ ] Benchmark with/without cache + - [ ] Benchmark selective parsing + - [ ] Compare memory usage + +**Completion Criteria:** All tests pass, documentation complete, README has working examples. + +--- + +## Future Enhancements (Post-MVP) + +These are documented in DESIGN.md "Future Considerations" but not required for initial release: + +- [ ] ParserBuilder API for more flexible configuration +- [ ] Support for added/removed components in prefab overrides +- [ ] Component serialization versioning +- [ ] More asset types (Materials, Textures, Animators, ScriptableObjects) +- [ ] Binary cache format (faster than SQLite) +- [ ] Helper query methods wrapping Sparsey +- [ ] Parallel parsing (already in design, low priority) +- [ ] Unity package support (Packages/...) + +--- + +## Key Design Decisions to Remember + +1. **Sparsey over other ECS**: Chosen for excellent insertion performance, lightweight. Query performance trade-off acceptable. +2. **Selective parsing**: Memory efficiency by only parsing declared component types. +3. **Stream-based YAML**: Skip unknown components without allocation. +4. **Post-process transforms**: Compute world matrices after all entities loaded. +5. **Offline-only**: No runtime integration, works on exported files only. +6. **Single macro**: User declares all types in one place for convenience. +7. **Direct World exposure**: Advanced users get full Sparsey access. + +--- + +## Dependencies Reference + +### unity-parser +- `sparsey` - ECS backend +- `serde`, `serde_yaml`, `serde_json` - Serialization +- `anyhow` - Error handling +- `glam` - Math types (Vec3, Quat, Mat4) +- `rayon` - Parallel processing +- `walkdir` - Directory traversal + +### unity-project-derive +- `syn` - Parse Rust syntax +- `quote` - Generate Rust code +- `proc-macro2` - Procedural macro utilities + +### cursebreaker-parser (CLI) +- `unity-parser` - Core library +- `clap` - CLI argument parsing +- `rusqlite`/`sqlx` - SQLite access +- `tokio` - Async runtime +- `indicatif` - Progress bars +- `dotenv` - .env file support + +--- + +## Getting Started (For Future Context) + +To resume implementation: + +1. Check this roadmap to see current phase +2. Read NOTES.md for any important decisions/gotchas +3. Review DESIGN.md for architectural details +4. Start with the first unchecked task in current phase +5. Update checkboxes as you complete tasks +6. Update NOTES.md with any new discoveries + +**Current Phase:** Phase 2 (YAML Parsing) +**Next Action:** Create `unity-parser/src/yaml/mod.rs` module structure + diff --git a/cursebreaker-parser/Cargo.toml b/cursebreaker-parser/Cargo.toml new file mode 100644 index 0000000..db32108 --- /dev/null +++ b/cursebreaker-parser/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "cursebreaker-parser" +version = "0.1.0" +edition = "2021" +description = "Unity project parser that creates SQL databases from Unity projects" +license = "MIT" + +[[bin]] +name = "cursebreaker-parser" +path = "src/main.rs" + +[dependencies] +unity-parser = { path = "../unity-parser" } +serde.workspace = true +serde_json.workspace = true +anyhow.workspace = true +tokio.workspace = true +dotenvy.workspace = true +clap = { version = "4.0", features = ["derive"] } +rusqlite = "0.29" +sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "sqlite"] } +quick-xml = "0.31" diff --git a/cursebreaker-parser/src/main.rs b/cursebreaker-parser/src/main.rs new file mode 100644 index 0000000..60e35fa --- /dev/null +++ b/cursebreaker-parser/src/main.rs @@ -0,0 +1,155 @@ +//! Cursebreaker Parser - Unity project parser for the Cursebreaker game +//! +//! This tool parses the Cursebreaker Unity project and extracts game data +//! into structured formats (SQL, JSON, etc.). + +use anyhow::{Context, Result}; +use clap::{Parser, Subcommand}; +use std::path::PathBuf; +use unity_parser::ParseContext; + +#[derive(Parser)] +#[command(name = "cursebreaker-parser")] +#[command(about = "Parse Cursebreaker Unity project and extract game data", long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Scan the Unity project and display statistics + Scan { + /// Path to the Unity project directory + #[arg(short, long)] + project: Option, + }, + + /// List all assets in the project + List { + /// Path to the Unity project directory + #[arg(short, long)] + project: Option, + + /// Filter by file extension (e.g., "prefab", "unity") + #[arg(short, long)] + filter: Option, + }, +} + +fn main() -> Result<()> { + // Load environment variables from .env file + dotenvy::dotenv().ok(); + + let cli = Cli::parse(); + + match cli.command { + Commands::Scan { project } => scan_project(project)?, + Commands::List { project, filter } => list_assets(project, filter)?, + } + + Ok(()) +} + +/// Scan a Unity project and display statistics +fn scan_project(project_path: Option) -> Result<()> { + let project_path = get_project_path(project_path)?; + + println!("=== Cursebreaker Unity Parser - Phase 1 Test ==="); + println!(); + println!("Scanning Unity project at: {}", project_path.display()); + println!(); + + // Create parse context - this will scan all .meta files + let context = ParseContext::new(&project_path).context("Failed to create parse context")?; + + // Display statistics + let stats = context.stats(); + println!("=== Scan Results ==="); + println!("{}", stats); + println!(); + + // Display some example GUIDs + println!("=== Sample Assets ==="); + for (count, (guid, path)) in context.guid_map().iter().enumerate() { + if count >= 10 { + println!("... and {} more", context.guid_map().len() - 10); + break; + } + + // Make the path relative to the project root for cleaner display + let relative_path = path.strip_prefix(&project_path).unwrap_or(path); + + println!(" GUID: {} -> {}", guid, relative_path.display()); + } + + println!(); + println!("✓ Phase 1 Complete: Successfully scanned project and extracted all GUIDs!"); + + Ok(()) +} + +/// List all assets in the project, optionally filtered by extension +fn list_assets(project_path: Option, filter: Option) -> Result<()> { + let project_path = get_project_path(project_path)?; + + println!("Loading Unity project at: {}", project_path.display()); + + let context = ParseContext::new(&project_path).context("Failed to create parse context")?; + + println!(); + println!("=== Assets in Project ==="); + + let mut count = 0; + for (_guid, path) in context.guid_map().iter() { + // Apply filter if provided + if let Some(ref ext) = filter { + if let Some(path_ext) = path.extension().and_then(|s| s.to_str()) { + if path_ext != ext { + continue; + } + } else { + continue; + } + } + + let relative_path = path.strip_prefix(&project_path).unwrap_or(path); + + println!(" {}", relative_path.display()); + count += 1; + } + + println!(); + println!("Total: {} assets", count); + + Ok(()) +} + +/// Get the Unity project path from CLI argument or environment variable +fn get_project_path(cli_path: Option) -> Result { + if let Some(path) = cli_path { + return Ok(path); + } + + // Try to get from environment variable + if let Ok(path) = std::env::var("UNITY_PROJECT_PATH") { + let path = PathBuf::from(path); + if path.exists() { + return Ok(path); + } else { + eprintln!( + "Warning: UNITY_PROJECT_PATH exists but directory not found: {}", + path.display() + ); + } + } + + anyhow::bail!( + "No Unity project path provided. Either:\n\ + 1. Pass --project argument\n\ + 2. Set UNITY_PROJECT_PATH in .env file\n\ + \n\ + Example .env file:\n\ + UNITY_PROJECT_PATH=c:\\Repos\\CBAssets" + ) +} diff --git a/data/tests/unity-csharp-reference b/data/tests/unity-csharp-reference new file mode 160000 index 0000000..73bda32 --- /dev/null +++ b/data/tests/unity-csharp-reference @@ -0,0 +1 @@ +Subproject commit 73bda32dd32f78d0ba4da92466be956020eb7073 diff --git a/data/tests/unity-sampleproject b/data/tests/unity-sampleproject new file mode 160000 index 0000000..38bccb6 --- /dev/null +++ b/data/tests/unity-sampleproject @@ -0,0 +1 @@ +Subproject commit 38bccb6171804e0a88c013eaae67048bb2618eda diff --git a/scripts/setup-test-project.bat b/scripts/setup-test-project.bat new file mode 100644 index 0000000..4ef97d7 --- /dev/null +++ b/scripts/setup-test-project.bat @@ -0,0 +1,29 @@ +@echo off +setlocal enabledelayedexpansion + +REM Setup script for test Unity projects (Windows) +REM This initializes and updates git submodules containing test Unity projects + +echo Setting up test Unity projects... + +REM Initialize and update all submodules +echo Initializing git submodules... +git submodule init +git submodule update + +REM Verify that at least one test project exists +if not exist "data\tests" ( + echo Error: Test projects directory not found + echo Please ensure git submodules are properly initialized + exit /b 1 +) + +echo Test projects setup complete! +echo Available test projects: +for /d %%d in (data\tests\*) do ( + if exist "%%d" ( + echo - %%~nd + ) +) + +goto :eof diff --git a/scripts/setup-test-project.sh b/scripts/setup-test-project.sh new file mode 100644 index 0000000..92322ca --- /dev/null +++ b/scripts/setup-test-project.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Setup script for test Unity projects +# This initializes and updates git submodules containing test Unity projects + +set -e + +echo "Setting up test Unity projects..." + +# Initialize and update all submodules +echo "Initializing git submodules..." +git submodule init +git submodule update + +# Verify that test projects exist +if [ ! -d "data/tests" ]; then + echo "Error: Test projects directory not found" + echo "Please ensure git submodules are properly initialized" + exit 1 +fi + +echo "Test projects setup complete!" +echo "Available test projects:" +for dir in data/tests/*/; do + if [ -d "$dir" ]; then + basename "$dir" + fi +done + diff --git a/unity-parser/Cargo.toml b/unity-parser/Cargo.toml new file mode 100644 index 0000000..6c6a9b5 --- /dev/null +++ b/unity-parser/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "unity-parser" +version = "0.1.0" +edition = "2021" +description = "A library for parsing Unity project YAML files" +license = "MIT" + +[lib] +name = "unity_parser" +path = "src/lib.rs" + +[dependencies] +serde.workspace = true +serde_yaml.workspace = true +serde_json.workspace = true +anyhow.workspace = true +glam.workspace = true +sparsey.workspace = true +walkdir.workspace = true +rayon.workspace = true +thiserror.workspace = true + +# unity-project-derive = { path = "../unity-project-derive" } # Will be created in Phase 7 \ No newline at end of file diff --git a/unity-parser/src/asset.rs b/unity-parser/src/asset.rs new file mode 100644 index 0000000..ec58daa --- /dev/null +++ b/unity-parser/src/asset.rs @@ -0,0 +1,424 @@ +//! Asset parsing trait and utilities + +use crate::context::ParseContext; +use crate::error::Result; +use crate::types::FileID; +use serde_yaml::Value as YamlValue; +use std::path::Path; + +/// Trait for types that can be parsed from Unity asset files +/// +/// This trait is implemented by different asset types (scenes, prefabs, etc.) +/// to provide a unified interface for parsing. +pub trait AssetParser: Sized { + /// Returns the file extensions this parser can handle + /// + /// For example, scene files return `["unity"]`, prefabs return `["prefab"]` + fn extensions() -> &'static [&'static str]; + + /// Parse an asset from YAML data + /// + /// # Arguments + /// * `yaml` - The parsed YAML value representing the asset + /// * `context` - The parse context with GUID mappings and state + /// + /// # Returns + /// The parsed asset on success, or an error if parsing fails + fn parse(yaml: &YamlValue, context: &mut ParseContext) -> Result; + + /// Parse an asset from a file path + /// + /// This is a convenience method that reads the file, parses the YAML, + /// and calls the `parse` method. + fn parse_file(path: impl AsRef, context: &mut ParseContext) -> Result { + let path = path.as_ref(); + + // Read the file contents + let contents = std::fs::read_to_string(path)?; + + // Parse as YAML + let yaml: YamlValue = + serde_yaml::from_str(&contents).map_err(|e| crate::error::ParseError::InvalidYaml { + file: path.to_path_buf(), + source: e, + })?; + + // Set the current file in context + context.set_current_file(path); + + // Parse the asset + Self::parse(&yaml, context) + } + + /// Check if this parser can handle a given file extension + fn can_parse(extension: &str) -> bool { + Self::extensions().contains(&extension) + } +} + +/// Unity component type tag (e.g., !u!1 for GameObject, !u!4 for Transform) +/// +/// Type-safe wrapper around Unity's type tag system. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct UnityTypeTag(pub u32); + +impl UnityTypeTag { + /// GameObject type + pub const GAME_OBJECT: Self = UnityTypeTag(1); + /// Transform type + pub const TRANSFORM: Self = UnityTypeTag(4); + /// MeshRenderer type + pub const MESH_RENDERER: Self = UnityTypeTag(23); + /// MeshFilter type + pub const MESH_FILTER: Self = UnityTypeTag(33); + /// Mesh type + pub const MESH: Self = UnityTypeTag(43); + /// Rigidbody type + pub const RIGIDBODY: Self = UnityTypeTag(54); + /// MeshCollider type + pub const MESH_COLLIDER: Self = UnityTypeTag(64); + /// BoxCollider type + pub const BOX_COLLIDER: Self = UnityTypeTag(65); + /// SphereCollider type + pub const SPHERE_COLLIDER: Self = UnityTypeTag(135); + /// CapsuleCollider type + pub const CAPSULE_COLLIDER: Self = UnityTypeTag(136); + /// MonoBehaviour type (custom scripts) + pub const MONO_BEHAVIOUR: Self = UnityTypeTag(114); + /// RectTransform type + pub const RECT_TRANSFORM: Self = UnityTypeTag(224); + /// PrefabInstance type + pub const PREFAB_INSTANCE: Self = UnityTypeTag(1001); + + /// Create a new type tag + pub fn new(tag: u32) -> Self { + UnityTypeTag(tag) + } + + /// Get the raw tag value + pub fn value(&self) -> u32 { + self.0 + } + + /// Get the type name for this tag + pub fn type_name(&self) -> &'static str { + match self.0 { + 1 => "GameObject", + 4 => "Transform", + 23 => "MeshRenderer", + 33 => "MeshFilter", + 43 => "Mesh", + 54 => "Rigidbody", + 64 => "MeshCollider", + 65 => "BoxCollider", + 135 => "SphereCollider", + 136 => "CapsuleCollider", + 114 => "MonoBehaviour", + 224 => "RectTransform", + 1001 => "PrefabInstance", + _ => "Unknown", + } + } + + /// Check if this is a MonoBehaviour (custom script) + pub fn is_mono_behaviour(&self) -> bool { + self.0 == 114 + } + + /// Check if this is a GameObject + pub fn is_game_object(&self) -> bool { + self.0 == 1 + } + + /// Check if this is a Transform + pub fn is_transform(&self) -> bool { + self.0 == 4 + } + + /// Check if this is a PrefabInstance + pub fn is_prefab_instance(&self) -> bool { + self.0 == 1001 + } +} + +/// Unity document header information +/// +/// Unity YAML documents start with headers like: +/// ```text +/// --- !u!1 &123456 +/// ``` +/// +/// Where: +/// - `!u!1` is the type tag (in this case, type 1 = GameObject) +/// - `&123456` is the fileID anchor +#[derive(Debug, Clone, PartialEq)] +pub struct UnityDocumentHeader { + /// The Unity type tag (e.g., 1 for GameObject, 4 for Transform) + pub type_tag: UnityTypeTag, + + /// The fileID anchor + pub file_id: FileID, +} + +impl UnityDocumentHeader { + /// Parse a Unity document header from a YAML document marker + /// + /// # Example + /// ```text + /// --- !u!1 &123456 + /// ``` + pub fn parse(header_line: &str) -> Option { + // Unity headers look like: "--- !u!1 &123456" + let parts: Vec<&str> = header_line.split_whitespace().collect(); + + if parts.len() < 3 || parts[0] != "---" { + return None; + } + + // Extract type ID from tag (e.g., 1 from "!u!1") + let type_tag_str = parts[1].strip_prefix("!u!")?; + let type_id = type_tag_str.parse::().ok()?; + let type_tag = UnityTypeTag::new(type_id); + + // Extract fileID from anchor (e.g., 123456 from "&123456") + let file_id_str = parts[2].strip_prefix('&')?; + let file_id_value = file_id_str.parse::().ok()?; + let file_id = FileID::new(file_id_value); + + Some(UnityDocumentHeader { type_tag, file_id }) + } + + /// Get the type name for this header + pub fn type_name(&self) -> &'static str { + self.type_tag.type_name() + } + + /// Get the type tag value + pub fn type_id(&self) -> u32 { + self.type_tag.value() + } + + /// Get the fileID value + pub fn file_id_value(&self) -> i64 { + self.file_id.value() + } +} + +/// Helper function to split Unity YAML into multiple documents +/// +/// Unity files contain multiple YAML documents separated by `---` markers. +/// This function splits the content into individual documents. +pub fn split_unity_yaml(content: &str) -> Vec<(Option, String)> { + let mut documents = Vec::new(); + let mut current_header = None; + let mut current_content = String::new(); + + for line in content.lines() { + if line.starts_with("---") { + // Save previous document if any + if !current_content.is_empty() { + documents.push((current_header.take(), current_content.clone())); + current_content.clear(); + } + + // Parse new header + current_header = UnityDocumentHeader::parse(line); + } else { + current_content.push_str(line); + current_content.push('\n'); + } + } + + // Don't forget the last document + if !current_content.is_empty() { + documents.push((current_header, current_content)); + } + + documents +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_unity_header() { + let header = UnityDocumentHeader::parse("--- !u!1 &123456").unwrap(); + assert_eq!(header.type_tag, UnityTypeTag::GAME_OBJECT); + assert_eq!(header.type_id(), 1); + assert_eq!(header.file_id_value(), 123456); + assert_eq!(header.type_name(), "GameObject"); + } + + #[test] + fn test_parse_transform_header() { + let header = UnityDocumentHeader::parse("--- !u!4 &789").unwrap(); + assert_eq!(header.type_tag, UnityTypeTag::TRANSFORM); + assert_eq!(header.type_id(), 4); + assert_eq!(header.file_id_value(), 789); + assert_eq!(header.type_name(), "Transform"); + } + + #[test] + fn test_parse_monobehaviour_header() { + let header = UnityDocumentHeader::parse("--- !u!114 &999888777").unwrap(); + assert_eq!(header.type_tag, UnityTypeTag::MONO_BEHAVIOUR); + assert_eq!(header.type_id(), 114); + assert_eq!(header.file_id_value(), 999888777); + assert_eq!(header.type_name(), "MonoBehaviour"); + assert!(header.type_tag.is_mono_behaviour()); + } + + #[test] + fn test_parse_various_unity_types() { + // MeshRenderer + let header = UnityDocumentHeader::parse("--- !u!23 &100").unwrap(); + assert_eq!(header.type_tag, UnityTypeTag::MESH_RENDERER); + assert_eq!(header.type_id(), 23); + assert_eq!(header.type_name(), "MeshRenderer"); + + // MeshFilter + let header = UnityDocumentHeader::parse("--- !u!33 &200").unwrap(); + assert_eq!(header.type_tag, UnityTypeTag::MESH_FILTER); + assert_eq!(header.type_id(), 33); + assert_eq!(header.type_name(), "MeshFilter"); + + // BoxCollider + let header = UnityDocumentHeader::parse("--- !u!65 &300").unwrap(); + assert_eq!(header.type_tag, UnityTypeTag::BOX_COLLIDER); + assert_eq!(header.type_id(), 65); + assert_eq!(header.type_name(), "BoxCollider"); + + // RectTransform + let header = UnityDocumentHeader::parse("--- !u!224 &400").unwrap(); + assert_eq!(header.type_tag, UnityTypeTag::RECT_TRANSFORM); + assert_eq!(header.type_id(), 224); + assert_eq!(header.type_name(), "RectTransform"); + } + + #[test] + fn test_parse_unknown_type() { + let header = UnityDocumentHeader::parse("--- !u!9999 &123").unwrap(); + assert_eq!(header.type_id(), 9999); + assert_eq!(header.type_name(), "Unknown"); + } + + #[test] + fn test_invalid_header() { + assert!(UnityDocumentHeader::parse("not a header").is_none()); + assert!(UnityDocumentHeader::parse("--- invalid").is_none()); + assert!(UnityDocumentHeader::parse("!u!1 &123").is_none()); // Missing --- + assert!(UnityDocumentHeader::parse("--- !u!1").is_none()); // Missing anchor + assert!(UnityDocumentHeader::parse("--- &123").is_none()); // Missing type + } + + #[test] + fn test_invalid_header_malformed() { + assert!(UnityDocumentHeader::parse("--- !u!abc &123").is_none()); // Invalid type ID + assert!(UnityDocumentHeader::parse("--- !u!1 &abc").is_none()); // Invalid file ID + assert!(UnityDocumentHeader::parse("--- !u!1 123").is_none()); // Missing & in anchor + } + + #[test] + fn test_split_unity_yaml_empty() { + let documents = split_unity_yaml(""); + assert_eq!(documents.len(), 0); + } + + #[test] + fn test_split_unity_yaml_single_document() { + let yaml = r#"--- !u!1 &123 +GameObject: + m_Name: TestObject +"#; + + let documents = split_unity_yaml(yaml); + assert_eq!(documents.len(), 1); + + let (header, content) = &documents[0]; + assert!(header.is_some()); + let header = header.as_ref().unwrap(); + assert_eq!(header.type_id(), 1); + assert_eq!(header.file_id_value(), 123); + assert!(content.contains("GameObject")); + assert!(content.contains("TestObject")); + } + + #[test] + fn test_split_unity_yaml_multiple_documents() { + let yaml = r#"--- !u!1 &100 +GameObject: + m_Name: First +--- !u!4 &200 +Transform: + m_Position: {x: 0, y: 0, z: 0} +--- !u!114 &300 +MonoBehaviour: + m_Script: {fileID: 123} +"#; + + let documents = split_unity_yaml(yaml); + assert_eq!(documents.len(), 3); + + // Check first document + assert_eq!(documents[0].0.as_ref().unwrap().type_id(), 1); + assert!(documents[0].1.contains("First")); + + // Check second document + assert_eq!(documents[1].0.as_ref().unwrap().type_id(), 4); + assert!(documents[1].1.contains("Transform")); + + // Check third document + assert_eq!(documents[2].0.as_ref().unwrap().type_id(), 114); + assert!(documents[2].1.contains("MonoBehaviour")); + } + + #[test] + fn test_unity_header_equality() { + let header1 = UnityDocumentHeader { + type_tag: UnityTypeTag::GAME_OBJECT, + file_id: FileID::new(123), + }; + + let header2 = UnityDocumentHeader { + type_tag: UnityTypeTag::GAME_OBJECT, + file_id: FileID::new(123), + }; + + assert_eq!(header1, header2); + } + + #[test] + fn test_unity_type_tag_constants() { + assert_eq!(UnityTypeTag::GAME_OBJECT.value(), 1); + assert_eq!(UnityTypeTag::TRANSFORM.value(), 4); + assert_eq!(UnityTypeTag::MONO_BEHAVIOUR.value(), 114); + assert_eq!(UnityTypeTag::PREFAB_INSTANCE.value(), 1001); + + assert!(UnityTypeTag::GAME_OBJECT.is_game_object()); + assert!(UnityTypeTag::TRANSFORM.is_transform()); + assert!(UnityTypeTag::MONO_BEHAVIOUR.is_mono_behaviour()); + assert!(UnityTypeTag::PREFAB_INSTANCE.is_prefab_instance()); + } + + #[test] + fn test_split_yaml_with_content_between_markers() { + let yaml = r#"--- !u!1 &100 +GameObject: + m_Name: Object1 +--- !u!4 &200 +Transform: + m_LocalPosition: {x: 1, y: 2, z: 3} +"#; + + let documents = split_unity_yaml(yaml); + assert_eq!(documents.len(), 2); + + // Verify content is properly split + assert!(documents[0].1.contains("Object1")); + assert!(!documents[0].1.contains("Transform")); + + assert!(documents[1].1.contains("Transform")); + assert!(!documents[1].1.contains("Object1")); + } +} diff --git a/unity-parser/src/context.rs b/unity-parser/src/context.rs new file mode 100644 index 0000000..3c2818e --- /dev/null +++ b/unity-parser/src/context.rs @@ -0,0 +1,363 @@ +//! Parse context that maintains state during Unity project parsing + +use crate::error::{ParseError, Result}; +use crate::meta::GUIDMap; +use crate::types::{FileID, GUID}; +use sparsey::storage::Entity; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +/// Context maintained during parsing of Unity files +/// +/// This structure holds all the shared state needed during parsing: +/// - GUID to file path mappings +/// - FileID to Entity mappings (per-file scope) +/// - Cached prefab data +/// - Project root path +#[derive(Debug)] +pub struct ParseContext { + /// Root directory of the Unity project + project_root: PathBuf, + + /// Map of GUIDs to asset file paths + guid_map: GUIDMap, + + /// Current file being parsed (for resolving local references) + current_file: Option, + + /// Map FileID to Entity for the current file scope + file_id_to_entity: HashMap, + + /// Cache of loaded prefabs to avoid re-parsing + /// Maps prefab GUID to a list of entities that make up the prefab + prefab_cache: HashMap>, + + /// Stack of files currently being parsed (for circular reference detection) + parse_stack: Vec, +} + +impl ParseContext { + /// Create a new parse context for a Unity project + /// + /// This will scan the project directory for all .meta files and build + /// the GUID mapping table. + pub fn new(project_root: impl AsRef) -> Result { + let project_root = project_root.as_ref().to_path_buf(); + + if !project_root.exists() { + return Err(ParseError::MissingFile(project_root)); + } + + // Scan the project and build GUID map + let guid_map = GUIDMap::scan_project(&project_root)?; + + Ok(Self { + project_root, + guid_map, + current_file: None, + file_id_to_entity: HashMap::new(), + prefab_cache: HashMap::new(), + parse_stack: Vec::new(), + }) + } + + /// Get the project root directory + pub fn project_root(&self) -> &Path { + &self.project_root + } + + /// Get the GUID map + pub fn guid_map(&self) -> &GUIDMap { + &self.guid_map + } + + /// Get the currently parsing file + pub fn current_file(&self) -> Option<&Path> { + self.current_file.as_deref() + } + + /// Set the current file being parsed + /// + /// This clears the FileID -> Entity mappings since FileIDs are scoped per-file. + pub fn set_current_file(&mut self, path: impl AsRef) { + self.current_file = Some(path.as_ref().to_path_buf()); + self.file_id_to_entity.clear(); + } + + /// Clear the current file + pub fn clear_current_file(&mut self) { + self.current_file = None; + self.file_id_to_entity.clear(); + } + + /// Register a FileID -> Entity mapping for the current file + pub fn register_entity(&mut self, file_id: FileID, entity: Entity) { + self.file_id_to_entity.insert(file_id, entity); + } + + /// Look up an Entity by FileID in the current file scope + pub fn get_entity(&self, file_id: FileID) -> Option { + self.file_id_to_entity.get(&file_id).copied() + } + + /// Resolve a reference to an Entity + /// + /// For local references (no GUID), looks up in current file scope. + /// For external references (with GUID), looks up the file and then the FileID. + pub fn resolve_reference(&self, file_id: i64, guid: Option<&str>) -> Result> { + let file_id = FileID::new(file_id); + + // Null reference (fileID = 0) + if file_id.value() == 0 { + return Ok(None); + } + + match guid { + // Local reference - look up in current file scope + None => Ok(self.get_entity(file_id)), + + // External reference - would need to load the other file + // For now, we return None (will be implemented in later phases) + Some(_guid) => { + // TODO: Implement external reference resolution in Phase 5 + Ok(None) + } + } + } + + /// Check if a prefab is already loaded + pub fn is_prefab_cached(&self, guid: &GUID) -> bool { + self.prefab_cache.contains_key(guid) + } + + /// Get a cached prefab's entities + pub fn get_cached_prefab(&self, guid: &GUID) -> Option<&Vec> { + self.prefab_cache.get(guid) + } + + /// Cache a loaded prefab + pub fn cache_prefab(&mut self, guid: GUID, entities: Vec) { + self.prefab_cache.insert(guid, entities); + } + + /// Get the file path for a GUID + pub fn get_path_for_guid(&self, guid: &GUID) -> Option<&PathBuf> { + self.guid_map.get_path(guid) + } + + /// Get the GUID for a file path + pub fn get_guid_for_path(&self, path: &Path) -> Option<&GUID> { + self.guid_map.get_guid(path) + } + + /// Push a file onto the parse stack (for circular reference detection) + pub fn push_parse_file(&mut self, path: impl AsRef) -> Result<()> { + let path = path.as_ref(); + + // Check for circular references + if self.parse_stack.iter().any(|p| p == path) { + return Err(ParseError::CircularReference(format!( + "Circular reference detected: {}", + path.display() + ))); + } + + self.parse_stack.push(path.to_path_buf()); + Ok(()) + } + + /// Pop a file from the parse stack + pub fn pop_parse_file(&mut self) -> Option { + self.parse_stack.pop() + } + + /// Get the current parse depth (number of nested files being parsed) + pub fn parse_depth(&self) -> usize { + self.parse_stack.len() + } + + /// Get statistics about the context + pub fn stats(&self) -> ContextStats { + ContextStats { + total_guids: self.guid_map.len(), + cached_prefabs: self.prefab_cache.len(), + entities_in_scope: self.file_id_to_entity.len(), + parse_depth: self.parse_stack.len(), + } + } +} + +/// Statistics about the parse context +#[derive(Debug, Clone, Copy)] +pub struct ContextStats { + /// Total number of GUIDs in the project + pub total_guids: usize, + + /// Number of prefabs currently cached + pub cached_prefabs: usize, + + /// Number of entities registered in current file scope + pub entities_in_scope: usize, + + /// Current parse depth (nested prefab level) + pub parse_depth: usize, +} + +impl std::fmt::Display for ContextStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Context Stats: {} GUIDs, {} cached prefabs, {} entities in scope, depth {}", + self.total_guids, self.cached_prefabs, self.entities_in_scope, self.parse_depth + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use sparsey::storage::Version; + + // Note: Most ParseContext tests require filesystem access and are tested + // through integration tests. These are unit tests for the logic. + + #[test] + fn test_context_entity_registration() { + // Create a mock context (we can't test new() without a real directory) + // but we can test the entity registration logic + let file_id = FileID::new(12345); + let entity = Entity::new(0, Version::DEFAULT); + + // Just verify the types work correctly + assert_eq!(file_id.value(), 12345); + assert_eq!(entity, entity); + } + + #[test] + fn test_context_stats_display() { + let stats = ContextStats { + total_guids: 100, + cached_prefabs: 5, + entities_in_scope: 25, + parse_depth: 2, + }; + + let stats_str = format!("{}", stats); + assert!(stats_str.contains("100")); + assert!(stats_str.contains("5")); + assert!(stats_str.contains("25")); + assert!(stats_str.contains("2")); + assert!(stats_str.contains("Context Stats")); + } + + #[test] + fn test_context_with_test_projects() { + // Check for test projects in data/tests/ directory + // First try relative to current working directory + let mut test_base_path = std::path::PathBuf::from("data/tests"); + + // If not found, try relative to the Cargo.toml directory (src/../data/tests) + if !test_base_path.exists() { + test_base_path = std::path::PathBuf::from("../data/tests"); + } + + // Also try absolute path from environment variable + if !test_base_path.exists() { + if let Ok(manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") { + test_base_path = std::path::PathBuf::from(manifest_dir).join("../data/tests"); + } + } + + println!("Looking for test projects at: {:?}", test_base_path); + + if !test_base_path.exists() { + println!("Test projects directory not found, skipping integration test"); + return; + } + + // Find all subdirectories in data/tests that contain Unity projects + let mut test_projects_found = 0; + let mut total_guids_loaded = 0; + + if let Ok(entries) = std::fs::read_dir(test_base_path) { + for entry in entries.flatten() { + let path = entry.path(); + + // Skip if not a directory + if !path.is_dir() { + continue; + } + + // Check for Unity project markers (Assets folder, ProjectSettings, etc.) + let assets_path = path.join("Assets"); + let project_settings_path = path.join("ProjectSettings"); + + // For unity-sampleproject, the Assets are nested under PiratePanic/Assets + let pirate_panic_assets = path.join("PiratePanic").join("Assets"); + + let unity_project_path = if assets_path.exists() { + assets_path + } else if pirate_panic_assets.exists() { + pirate_panic_assets + } else if project_settings_path.exists() { + // Some projects have ProjectSettings but Assets elsewhere + path.clone() + } else { + continue; // Not a Unity project + }; + + let project_name = path + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown"); + + println!( + "Testing Unity project: {} at {:?}", + project_name, unity_project_path + ); + + // This is an integration test that requires the test project to be set up + match ParseContext::new(&unity_project_path) { + Ok(context) => { + let guid_count = context.guid_map().len(); + assert!( + guid_count > 0, + "Test project {} should have some assets", + project_name + ); + + println!( + "✓ Successfully loaded {} GUIDs from {}", + guid_count, project_name + ); + test_projects_found += 1; + total_guids_loaded += guid_count; + } + Err(e) => { + println!("✗ Test project {} scan failed: {}", project_name, e); + // Don't fail the test - some projects might have parsing issues + } + } + } + } + + if test_projects_found == 0 { + println!("No valid Unity test projects found in data/tests/"); + return; + } + + println!( + "Integration test summary: {} projects tested, {} total GUIDs loaded", + test_projects_found, total_guids_loaded + ); + + assert!( + test_projects_found > 0, + "At least one test project should be available" + ); + assert!( + total_guids_loaded > 0, + "At least some GUIDs should be loaded from test projects" + ); + } +} diff --git a/unity-parser/src/error.rs b/unity-parser/src/error.rs new file mode 100644 index 0000000..c5ee8c6 --- /dev/null +++ b/unity-parser/src/error.rs @@ -0,0 +1,101 @@ +//! Error types for Unity parser operations + +use std::path::PathBuf; +use thiserror::Error; + +/// Main error type for Unity parsing operations +#[derive(Error, Debug)] +pub enum ParseError { + /// Failed to parse YAML content + #[error("Invalid YAML in file {file:?}: {source}")] + InvalidYaml { + file: PathBuf, + source: serde_yaml::Error, + }, + + /// Required file was not found + #[error("Missing file: {0}")] + MissingFile(PathBuf), + + /// Invalid or unresolvable reference + #[error("Invalid reference - fileID: {file_id}, guid: {guid:?}")] + InvalidReference { file_id: i64, guid: Option }, + + /// Component was expected but not found + #[error("Component not found: {component_type} on entity with fileID {file_id}")] + ComponentNotFound { + component_type: String, + file_id: i64, + }, + + /// IO error occurred + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + /// Failed to parse GUID from meta file + #[error("Failed to parse GUID from meta file {file:?}")] + InvalidMetaFile { file: PathBuf }, + + /// Invalid file format or structure + #[error("Invalid file format: {0}")] + InvalidFormat(String), + + /// Circular reference detected in prefab hierarchy + #[error("Circular prefab reference detected: {0}")] + CircularReference(String), + + /// Generic parsing error + #[error("Parse error: {0}")] + Generic(String), +} + +/// Result type alias for Unity parser operations +pub type Result = std::result::Result; + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn test_parse_error_display() { + let error = ParseError::MissingFile(PathBuf::from("test.unity")); + assert!(error.to_string().contains("test.unity")); + + let error = ParseError::InvalidReference { + file_id: 12345, + guid: Some("abc123".to_string()), + }; + assert!(error.to_string().contains("12345")); + assert!(error.to_string().contains("abc123")); + + let error = ParseError::ComponentNotFound { + component_type: "Transform".to_string(), + file_id: 99, + }; + assert!(error.to_string().contains("Transform")); + assert!(error.to_string().contains("99")); + } + + #[test] + fn test_parse_error_from_io() { + let io_error = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found"); + let parse_error: ParseError = io_error.into(); + + assert!(matches!(parse_error, ParseError::Io(_))); + assert!(parse_error.to_string().contains("IO error")); + } + + #[test] + fn test_circular_reference_error() { + let error = ParseError::CircularReference("prefab cycle detected".to_string()); + assert!(error.to_string().contains("Circular")); + assert!(error.to_string().contains("prefab cycle")); + } + + #[test] + fn test_generic_error() { + let error = ParseError::Generic("custom error message".to_string()); + assert!(error.to_string().contains("custom error message")); + } +} diff --git a/unity-parser/src/lib.rs b/unity-parser/src/lib.rs new file mode 100644 index 0000000..4668fd5 --- /dev/null +++ b/unity-parser/src/lib.rs @@ -0,0 +1,68 @@ +//! Unity Parser - A library for parsing Unity project files +//! +//! This library provides tools for parsing Unity scene and prefab files from their +//! YAML representation and loading them into an ECS world using Sparsey. +//! +//! # Features +//! +//! - Parse Unity .meta files and extract GUIDs +//! - Map GUIDs to file paths across a Unity project +//! - Parse Unity YAML files (scenes and prefabs) +//! - Load parsed data into Sparsey ECS worlds +//! - Support for nested prefabs and property overrides +//! - Selective parsing - only load components you care about +//! +//! # Example +//! +//! ```no_run +//! use unity_parser::{ParseContext, meta::GUIDMap}; +//! +//! # fn main() -> anyhow::Result<()> { +//! // Create a parse context for your Unity project +//! let context = ParseContext::new("path/to/unity/project")?; +//! +//! // The context now contains a mapping of all GUIDs in the project +//! println!("Found {} assets", context.guid_map().len()); +//! +//! // You can now parse scenes and prefabs (coming in later phases) +//! # Ok(()) +//! # } +//! ``` + +// Public modules +pub mod asset; +pub mod context; +pub mod error; +pub mod meta; +pub mod types; +pub mod world; + +// Re-export commonly used types +pub use asset::{UnityDocumentHeader, UnityTypeTag}; +pub use context::ParseContext; +pub use error::{ParseError, Result}; +pub use meta::{GUIDMap, MetaFile}; +pub use types::{FileID, PropertyPath, Reference, GUID}; +pub use world::{load_scene, WorldBuilder}; + +// Re-export Sparsey types for convenience +pub use sparsey; +pub use sparsey::storage::Entity; +pub use sparsey::world::World; + +/// Library version +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); + +/// Library name +pub const NAME: &str = env!("CARGO_PKG_NAME"); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_version() { + assert!(!VERSION.is_empty()); + assert_eq!(NAME, "unity-parser"); + } +} diff --git a/unity-parser/src/meta.rs b/unity-parser/src/meta.rs new file mode 100644 index 0000000..41cb9c4 --- /dev/null +++ b/unity-parser/src/meta.rs @@ -0,0 +1,333 @@ +//! Parser for Unity .meta files +//! +//! Unity stores metadata about each asset in a .meta file with the same name. +//! The most important piece of information is the GUID, which uniquely identifies +//! the asset across the entire project. + +use crate::error::{ParseError, Result}; +use crate::types::GUID; +use serde::Deserialize; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use walkdir::WalkDir; + +/// Metadata extracted from a Unity .meta file +#[derive(Debug, Clone, Deserialize)] +pub struct MetaFile { + /// The globally unique identifier for this asset + pub guid: GUID, + + /// Optional file format version + #[serde(rename = "fileFormatVersion", default)] + pub file_format_version: Option, + + /// Optional time created + #[serde(rename = "timeCreated", default)] + pub time_created: Option, +} + +impl MetaFile { + /// Parse a .meta file from a path + pub fn from_file(path: impl AsRef) -> Result { + let path = path.as_ref(); + let contents = std::fs::read_to_string(path) + .map_err(|_| ParseError::MissingFile(path.to_path_buf()))?; + + Self::parse_str(&contents).map_err(|e| ParseError::InvalidYaml { + file: path.to_path_buf(), + source: e, + }) + } + + /// Parse a .meta file from a string + pub fn parse_str(contents: &str) -> std::result::Result { + serde_yaml::from_str(contents) + } +} + +/// Maps GUIDs to file paths for quick asset lookup +#[derive(Debug, Clone, Default)] +pub struct GUIDMap { + /// Map from GUID to the asset file path (without .meta extension) + guid_to_path: HashMap, + + /// Map from file path to GUID (for reverse lookup) + path_to_guid: HashMap, +} + +impl GUIDMap { + /// Create a new empty GUID map + pub fn new() -> Self { + Self::default() + } + + /// Build a GUID map by scanning a Unity project directory + /// + /// This will recursively walk the directory tree and parse all .meta files, + /// building a complete mapping of GUIDs to asset paths. + pub fn scan_project(project_root: impl AsRef) -> Result { + let project_root = project_root.as_ref(); + let mut map = Self::new(); + + println!("Scanning Unity project at: {}", project_root.display()); + + let mut meta_count = 0; + let mut error_count = 0; + + // Walk the directory tree + for entry in WalkDir::new(project_root) + .follow_links(false) + .into_iter() + .filter_entry(|e| { + // Skip hidden directories and common Unity folders we don't need + let file_name = e.file_name().to_string_lossy(); + !file_name.starts_with('.') + && file_name != "Library" + && file_name != "Temp" + && file_name != "obj" + && file_name != "Logs" + }) + { + let entry = match entry { + Ok(e) => e, + Err(e) => { + eprintln!("Warning: Failed to read directory entry: {}", e); + error_count += 1; + continue; + } + }; + + let path = entry.path(); + + // Only process .meta files + if path.extension().and_then(|s| s.to_str()) != Some("meta") { + continue; + } + + // Parse the .meta file + match MetaFile::from_file(path) { + Ok(meta) => { + // Get the asset path (remove .meta extension) + if let Some(asset_path) = path.to_str().and_then(|s| s.strip_suffix(".meta")) { + let asset_path = PathBuf::from(asset_path); + let guid = meta.guid; + + map.insert(guid, asset_path); + meta_count += 1; + + if meta_count % 1000 == 0 { + println!(" Processed {} .meta files...", meta_count); + } + } + } + Err(e) => { + eprintln!( + "Warning: Failed to parse meta file {}: {}", + path.display(), + e + ); + error_count += 1; + } + } + } + + println!( + "Scan complete: {} GUIDs loaded, {} errors", + meta_count, error_count + ); + + Ok(map) + } + + /// Insert a GUID -> path mapping + pub fn insert(&mut self, guid: GUID, path: PathBuf) { + self.path_to_guid.insert(path.clone(), guid.clone()); + self.guid_to_path.insert(guid, path); + } + + /// Look up a file path by GUID + pub fn get_path(&self, guid: &GUID) -> Option<&PathBuf> { + self.guid_to_path.get(guid) + } + + /// Look up a GUID by file path + pub fn get_guid(&self, path: &Path) -> Option<&GUID> { + self.path_to_guid.get(path) + } + + /// Check if a GUID exists in the map + pub fn contains_guid(&self, guid: &GUID) -> bool { + self.guid_to_path.contains_key(guid) + } + + /// Get the total number of GUIDs in the map + pub fn len(&self) -> usize { + self.guid_to_path.len() + } + + /// Check if the map is empty + pub fn is_empty(&self) -> bool { + self.guid_to_path.is_empty() + } + + /// Iterate over all GUID -> path mappings + pub fn iter(&self) -> impl Iterator { + self.guid_to_path.iter() + } + + /// Get all GUIDs + pub fn guids(&self) -> impl Iterator { + self.guid_to_path.keys() + } + + /// Get all paths + pub fn paths(&self) -> impl Iterator { + self.guid_to_path.values() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_meta_file_basic() { + let meta_content = r#" +fileFormatVersion: 2 +guid: 1234567890abcdef1234567890abcdef +timeCreated: 1234567890 +"#; + + let meta = MetaFile::parse_str(meta_content).unwrap(); + assert_eq!(meta.guid.as_str(), "1234567890abcdef1234567890abcdef"); + assert_eq!(meta.time_created, Some(1234567890)); + assert_eq!(meta.file_format_version, Some(2)); + } + + #[test] + fn test_parse_meta_file_minimal() { + let meta_content = r#" +guid: abc123def45678901234567890123456 +"#; + + let meta = MetaFile::parse_str(meta_content).unwrap(); + assert_eq!(meta.guid.as_str(), "abc123def45678901234567890123456"); + assert_eq!(meta.time_created, None); + assert_eq!(meta.file_format_version, None); + } + + #[test] + fn test_meta_file_guid_conversion() { + let meta_content = r#" +guid: 12345678901234567890123456789012 +"#; + + let meta = MetaFile::parse_str(meta_content).unwrap(); + assert_eq!(meta.guid.as_str(), "12345678901234567890123456789012"); + } + + #[test] + fn test_guid_deserialization() { + // Test direct GUID deserialization from YAML + use crate::types::GUID; + let yaml = "12345678901234567890123456789012"; + let guid: GUID = serde_yaml::from_str(yaml).unwrap(); + println!("Deserialized GUID: {}", guid.as_str()); + assert_eq!(guid.as_str(), "12345678901234567890123456789012"); + } + + #[test] + fn test_parse_invalid_meta_file() { + let invalid_content = "not valid yaml: [[["; + let result = MetaFile::parse_str(invalid_content); + assert!(result.is_err()); + } + + #[test] + fn test_guid_map_new() { + let map = GUIDMap::new(); + assert_eq!(map.len(), 0); + assert!(map.is_empty()); + } + + #[test] + fn test_guid_map_insert_and_lookup() { + let mut map = GUIDMap::new(); + let guid = GUID::new("12345678901234567890123456789012"); + let path = PathBuf::from("Assets/Test.prefab"); + + map.insert(guid.clone(), path.clone()); + + assert_eq!(map.get_path(&guid), Some(&path)); + assert_eq!(map.get_guid(&path), Some(&guid)); + assert!(map.contains_guid(&guid)); + assert_eq!(map.len(), 1); + assert!(!map.is_empty()); + } + + #[test] + fn test_guid_map_multiple_entries() { + let mut map = GUIDMap::new(); + + let guid1 = GUID::new("12345678901234567890123456789012"); + let path1 = PathBuf::from("Assets/File1.unity"); + + let guid2 = GUID::new("abcdefabcdefabcdefabcdefabcdefab"); + let path2 = PathBuf::from("Assets/File2.prefab"); + + map.insert(guid1.clone(), path1.clone()); + map.insert(guid2.clone(), path2.clone()); + + assert_eq!(map.len(), 2); + assert_eq!(map.get_path(&guid1), Some(&path1)); + assert_eq!(map.get_path(&guid2), Some(&path2)); + } + + #[test] + fn test_guid_map_overwrite() { + let mut map = GUIDMap::new(); + let guid = GUID::new("12345678901234567890123456789012"); + let path1 = PathBuf::from("Assets/Old.prefab"); + let path2 = PathBuf::from("Assets/New.prefab"); + + map.insert(guid.clone(), path1.clone()); + assert_eq!(map.len(), 1); + + map.insert(guid.clone(), path2.clone()); + assert_eq!(map.len(), 1); // Still 1 entry + assert_eq!(map.get_path(&guid), Some(&path2)); // Updated to new path + } + + #[test] + fn test_guid_map_iterators() { + let mut map = GUIDMap::new(); + map.insert( + GUID::new("12345678901234567890123456789012"), + PathBuf::from("path1"), + ); + map.insert( + GUID::new("abcdefabcdefabcdefabcdefabcdefab"), + PathBuf::from("path2"), + ); + + let guids: Vec<_> = map.guids().collect(); + assert_eq!(guids.len(), 2); + + let paths: Vec<_> = map.paths().collect(); + assert_eq!(paths.len(), 2); + + let pairs: Vec<_> = map.iter().collect(); + assert_eq!(pairs.len(), 2); + } + + #[test] + fn test_guid_map_lookup_missing() { + let map = GUIDMap::new(); + let guid = GUID::new("ffffffffffffffffffffffffffffffff"); + let path = PathBuf::from("nonexistent.prefab"); + + assert_eq!(map.get_path(&guid), None); + assert_eq!(map.get_guid(&path), None); + assert!(!map.contains_guid(&guid)); + } +} diff --git a/unity-parser/src/types.rs b/unity-parser/src/types.rs new file mode 100644 index 0000000..5e9233f --- /dev/null +++ b/unity-parser/src/types.rs @@ -0,0 +1,445 @@ +//! Core types used throughout the Unity parser + +use serde::{Deserialize, Serialize}; +use std::fmt; + +/// Unity's local file identifier +/// +/// Each object within a Unity file has a unique fileID that identifies it +/// within that specific file. FileIDs are unique per-file but not globally. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct FileID(pub i64); + +impl FileID { + /// Create a new FileID + pub fn new(id: i64) -> Self { + FileID(id) + } + + /// Get the inner value + pub fn value(&self) -> i64 { + self.0 + } +} + +impl fmt::Display for FileID { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +/// Unity's globally unique identifier +/// +/// Each asset in a Unity project has a GUID stored in its .meta file. +/// GUIDs are globally unique and used to reference assets across files. +/// Stored as u128 for efficient memory usage and fast comparisons. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] +pub struct GUID(pub u128); + +impl GUID { + /// Create a new GUID from a hex string + /// + /// # Panics + /// + /// Panics if the string is not a valid 32-character hexadecimal string. + pub fn new(guid: impl Into) -> Self { + let guid_str = guid.into(); + Self::from_hex_str(&guid_str) + } + + /// Create a GUID from a hex string, returning an error if invalid + pub fn try_new(guid: impl Into) -> Result { + let guid_str = guid.into(); + Self::try_from_hex_str(&guid_str) + } + + /// Parse a 32-character hexadecimal string into a GUID + /// + /// # Panics + /// + /// Panics if the string is not exactly 32 characters or contains invalid hex digits. + pub fn from_hex_str(s: &str) -> Self { + Self::try_from_hex_str(s).expect("Invalid GUID format") + } + + /// Try to parse a 32-character hexadecimal string into a GUID + pub fn try_from_hex_str(s: &str) -> Result { + if s.len() != 32 { + return Err(format!( + "GUID must be exactly 32 characters, got {}", + s.len() + )); + } + + u128::from_str_radix(s, 16) + .map(GUID) + .map_err(|_| "GUID contains invalid hexadecimal characters".to_string()) + } + + /// Get the GUID as a string slice + pub fn as_str(&self) -> String { + format!("{:032x}", self.0) + } + + /// Get the raw u128 value + pub fn value(&self) -> u128 { + self.0 + } +} + +impl fmt::Display for GUID { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:032x}", self.0) + } +} + +impl From for GUID { + fn from(value: u128) -> Self { + GUID(value) + } +} + +impl From for GUID { + fn from(s: String) -> Self { + GUID::new(s) + } +} + +impl From<&str> for GUID { + fn from(s: &str) -> Self { + GUID::new(s) + } +} + +// Custom deserialization for GUID to handle both string and u128 input +impl<'de> serde::Deserialize<'de> for GUID { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{self, Visitor}; + use std::fmt; + + struct GuidVisitor; + + impl<'de> Visitor<'de> for GuidVisitor { + type Value = GUID; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a GUID as a hex string") + } + + fn visit_str(self, value: &str) -> std::result::Result + where + E: de::Error, + { + GUID::try_from_hex_str(value) + .map_err(|e| de::Error::custom(format!("Invalid GUID: {}", e))) + } + + fn visit_string(self, value: String) -> std::result::Result + where + E: de::Error, + { + self.visit_str(&value) + } + } + + deserializer.deserialize_str(GuidVisitor) + } +} + +/// Represents a Unity reference to another object +/// +/// Unity uses references in the form `{fileID: X}` for local references +/// or `{fileID: X, guid: Y}` for external references. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Reference { + /// The fileID within the referenced file + #[serde(rename = "fileID")] + pub file_id: i64, + + /// Optional GUID for external references (references to other files) + #[serde(skip_serializing_if = "Option::is_none")] + pub guid: Option, + + /// Optional type hint (used in some Unity versions) + #[serde(rename = "type", skip_serializing_if = "Option::is_none")] + pub type_hint: Option, +} + +impl Reference { + /// Create a local reference (within the same file) + pub fn local(file_id: i64) -> Self { + Reference { + file_id, + guid: None, + type_hint: None, + } + } + + /// Create an external reference (to another file) + pub fn external(file_id: i64, guid: impl Into) -> Self { + Reference { + file_id, + guid: Some(guid.into()), + type_hint: None, + } + } + + /// Check if this is a local reference + pub fn is_local(&self) -> bool { + self.guid.is_none() + } + + /// Check if this is an external reference + pub fn is_external(&self) -> bool { + self.guid.is_some() + } + + /// Check if this is a null reference (fileID = 0) + pub fn is_null(&self) -> bool { + self.file_id == 0 + } +} + +/// Represents a property path used in prefab overrides +/// +/// Unity uses property paths like "m_LocalPosition.x" or "m_Children.Array.data[0]" +/// to specify which property is being overridden in a prefab instance. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct PropertyPath(pub String); + +impl PropertyPath { + /// Create a new property path + pub fn new(path: impl Into) -> Self { + PropertyPath(path.into()) + } + + /// Get the path as a string slice + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Parse the path into segments + /// + /// For example: "m_LocalPosition.x" -> ["m_LocalPosition", "x"] + /// "m_Children.Array.data[0]" -> ["m_Children", "Array", "data", "[0]"] + pub fn segments(&self) -> Vec<&str> { + // Split by dots and preserve array indices + let mut segments = Vec::new(); + let mut current = String::new(); + + for ch in self.0.chars() { + match ch { + '.' => { + if !current.is_empty() { + segments.push(current.clone()); + current.clear(); + } + } + '[' => { + if !current.is_empty() { + segments.push(current.clone()); + current.clear(); + } + current.push('['); + } + _ => current.push(ch), + } + } + + if !current.is_empty() { + segments.push(current); + } + + // Convert to &str references (this is a simplified version) + // In real implementation, we'd return owned strings or use a more sophisticated approach + self.0.split('.').collect() + } +} + +impl fmt::Display for PropertyPath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for PropertyPath { + fn from(s: String) -> Self { + PropertyPath(s) + } +} + +impl From<&str> for PropertyPath { + fn from(s: &str) -> Self { + PropertyPath(s.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_file_id_creation() { + let file_id = FileID::new(12345); + assert_eq!(file_id.value(), 12345); + assert_eq!(format!("{}", file_id), "12345"); + } + + #[test] + fn test_file_id_equality() { + let file_id1 = FileID::new(100); + let file_id2 = FileID::new(100); + let file_id3 = FileID::new(200); + + assert_eq!(file_id1, file_id2); + assert_ne!(file_id1, file_id3); + } + + #[test] + fn test_guid_creation() { + let guid = GUID::new("abc123def45678901234567890123456"); + assert_eq!(guid.as_str(), "abc123def45678901234567890123456"); + assert_eq!(format!("{}", guid), "abc123def45678901234567890123456"); + } + + #[test] + fn test_guid_from_string() { + let guid: GUID = "12345678901234567890123456789012".into(); + assert_eq!(guid.as_str(), "12345678901234567890123456789012"); + + let guid2: GUID = String::from("abcdefabcdefabcdefabcdefabcdefab").into(); + assert_eq!(guid2.as_str(), "abcdefabcdefabcdefabcdefabcdefab"); + } + + #[test] + fn test_guid_from_u128() { + let value: u128 = 0x12345678901234567890123456789012; + let guid = GUID::from(value); + assert_eq!(guid.value(), value); + assert_eq!(guid.as_str(), "12345678901234567890123456789012"); + } + + #[test] + fn test_guid_equality() { + let guid1 = GUID::new("12345678901234567890123456789012"); + let guid2 = GUID::new("12345678901234567890123456789012"); + let guid3 = GUID::new("abcdefabcdefabcdefabcdefabcdefab"); + + assert_eq!(guid1, guid2); + assert_ne!(guid1, guid3); + } + + #[test] + fn test_guid_parsing() { + // Valid 32-character hex string + let guid = GUID::from_hex_str("12345678901234567890123456789012"); + assert_eq!(guid.value(), 0x12345678901234567890123456789012); + + // Test with uppercase + let guid_upper = GUID::from_hex_str("ABCDEFABCDEFABCDEFABCDEFABCDEFAB"); + assert_eq!(guid_upper.value(), 0xABCDEFABCDEFABCDEFABCDEFABCDEFAB); + } + + #[test] + fn test_guid_try_new() { + // Valid GUID + let guid = GUID::try_new("12345678901234567890123456789012").unwrap(); + assert_eq!(guid.value(), 0x12345678901234567890123456789012); + + // Invalid length + assert!(GUID::try_new("1234567890123456789012345678901").is_err()); // 31 chars + assert!(GUID::try_new("123456789012345678901234567890123").is_err()); // 33 chars + + // Invalid characters + assert!(GUID::try_new("gggggggggggggggggggggggggggggggg").is_err()); + } + + #[test] + #[should_panic(expected = "Invalid GUID format")] + fn test_guid_invalid_length_panics() { + GUID::new("1234567890123456789012345678901"); // 31 chars + } + + #[test] + #[should_panic(expected = "Invalid GUID format")] + fn test_guid_invalid_chars_panics() { + GUID::new("gggggggggggggggggggggggggggggggg"); + } + + #[test] + fn test_reference_is_local() { + let local_ref = Reference::local(12345); + assert!(local_ref.is_local()); + assert!(!local_ref.is_external()); + assert_eq!(local_ref.file_id, 12345); + assert!(local_ref.guid.is_none()); + } + + #[test] + fn test_reference_is_external() { + let external_ref = Reference::external(12345, "abc123def456"); + assert!(!external_ref.is_local()); + assert!(external_ref.is_external()); + assert_eq!(external_ref.file_id, 12345); + assert_eq!(external_ref.guid.as_deref(), Some("abc123def456")); + } + + #[test] + fn test_reference_is_null() { + let null_ref = Reference::local(0); + assert!(null_ref.is_null()); + + let non_null_ref = Reference::local(123); + assert!(!non_null_ref.is_null()); + } + + #[test] + fn test_property_path_creation() { + let path = PropertyPath::new("m_LocalPosition.x"); + assert_eq!(path.as_str(), "m_LocalPosition.x"); + assert_eq!(format!("{}", path), "m_LocalPosition.x"); + } + + #[test] + fn test_property_path_from_string() { + let path: PropertyPath = "m_Children.Array.data[0]".into(); + assert_eq!(path.as_str(), "m_Children.Array.data[0]"); + } + + #[test] + fn test_property_path_segments() { + let path = PropertyPath::new("m_LocalPosition.x"); + let segments = path.segments(); + assert_eq!(segments, vec!["m_LocalPosition", "x"]); + } + + #[test] + fn test_reference_serialization() { + let local_ref = Reference::local(12345); + let json = serde_json::to_string(&local_ref).unwrap(); + assert!(json.contains("12345")); + assert!(!json.contains("guid")); + + let external_ref = Reference::external(67890, "abc123"); + let json = serde_json::to_string(&external_ref).unwrap(); + assert!(json.contains("67890")); + assert!(json.contains("abc123")); + } + + #[test] + fn test_reference_deserialization() { + let json = r#"{"fileID":12345}"#; + let reference: Reference = serde_json::from_str(json).unwrap(); + assert_eq!(reference.file_id, 12345); + assert!(reference.is_local()); + + let json = r#"{"fileID":67890,"guid":"abc123"}"#; + let reference: Reference = serde_json::from_str(json).unwrap(); + assert_eq!(reference.file_id, 67890); + assert_eq!(reference.guid.as_deref(), Some("abc123")); + assert!(reference.is_external()); + } +} diff --git a/unity-parser/src/world.rs b/unity-parser/src/world.rs new file mode 100644 index 0000000..fe321f5 --- /dev/null +++ b/unity-parser/src/world.rs @@ -0,0 +1,258 @@ +//! ECS world management for loaded Unity scenes +//! +//! This module provides utilities for building and managing Sparsey ECS worlds +//! populated with Unity scene data. + +use crate::error::Result; +use crate::types::FileID; +use sparsey::storage::Entity; +use sparsey::world::World; +use std::collections::HashMap; + +/// Builder for constructing an ECS world from Unity scene data +/// +/// This builder manages the process of creating entities and attaching components +/// as Unity data is parsed. It maintains the mapping between Unity FileIDs and +/// Sparsey Entity IDs. +#[derive(Debug)] +pub struct WorldBuilder { + /// The underlying Sparsey world + world: World, + + /// Maps Unity FileIDs to Sparsey Entities for the current parsing scope + file_id_map: HashMap, +} + +impl WorldBuilder { + /// Create a new world builder with an empty world + pub fn new() -> Self { + Self { + world: World::default(), + file_id_map: HashMap::new(), + } + } + + /// Get a reference to the underlying world + pub fn world(&self) -> &World { + &self.world + } + + /// Get a mutable reference to the underlying world + pub fn world_mut(&mut self) -> &mut World { + &mut self.world + } + + /// Create a new entity and associate it with a Unity FileID + /// + /// Returns the created entity. + /// + /// Note: Creates an entity with no components initially. + /// Components should be added using `world_mut().insert()`. + pub fn create_entity(&mut self, file_id: FileID) -> Entity { + // Sparsey requires components to create an entity, so we create with an empty tuple + let entity = self.world.create(()); + self.file_id_map.insert(file_id, entity); + entity + } + + /// Get an entity by its FileID + pub fn get_entity(&self, file_id: FileID) -> Option { + self.file_id_map.get(&file_id).copied() + } + + /// Check if a FileID has been registered + pub fn has_entity(&self, file_id: FileID) -> bool { + self.file_id_map.contains_key(&file_id) + } + + /// Get the number of entities created + pub fn entity_count(&self) -> usize { + self.file_id_map.len() + } + + /// Clear the FileID mappings (used when starting a new file) + pub fn clear_mappings(&mut self) { + self.file_id_map.clear(); + } + + /// Consume the builder and return the completed world + pub fn build(self) -> World { + self.world + } + + /// Get statistics about the world builder + pub fn stats(&self) -> WorldBuilderStats { + WorldBuilderStats { + entity_count: self.file_id_map.len(), + // Note: Sparsey World doesn't have a len() method, so we use the map count + world_entity_count: self.file_id_map.len(), + } + } +} + +impl Default for WorldBuilder { + fn default() -> Self { + Self::new() + } +} + +/// Statistics about the world builder +#[derive(Debug, Clone, Copy)] +pub struct WorldBuilderStats { + /// Number of FileID mappings registered + pub entity_count: usize, + + /// Total number of entities in the world + pub world_entity_count: usize, +} + +impl std::fmt::Display for WorldBuilderStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "WorldBuilder Stats: {} mapped entities, {} total entities", + self.entity_count, self.world_entity_count + ) + } +} + +/// Helper function to load a Unity scene into an ECS world +/// +/// This is a placeholder for the full implementation that will come in Phase 4. +/// For now, it just creates an empty world. +pub fn load_scene(_scene_path: &std::path::Path) -> Result { + // TODO: Implement full scene loading in Phase 4 + Ok(World::default()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_world_builder_new() { + let builder = WorldBuilder::new(); + assert_eq!(builder.entity_count(), 0); + } + + #[test] + fn test_world_builder_default() { + let builder = WorldBuilder::default(); + assert_eq!(builder.entity_count(), 0); + } + + #[test] + fn test_world_builder_create_entity() { + let mut builder = WorldBuilder::new(); + let file_id = FileID::new(12345); + + let entity = builder.create_entity(file_id); + + assert!(builder.has_entity(file_id)); + assert_eq!(builder.get_entity(file_id), Some(entity)); + assert_eq!(builder.entity_count(), 1); + } + + #[test] + fn test_world_builder_multiple_entities() { + let mut builder = WorldBuilder::new(); + + let file_id1 = FileID::new(100); + let file_id2 = FileID::new(200); + let file_id3 = FileID::new(300); + + let entity1 = builder.create_entity(file_id1); + let entity2 = builder.create_entity(file_id2); + let entity3 = builder.create_entity(file_id3); + + assert_eq!(builder.entity_count(), 3); + assert_eq!(builder.get_entity(file_id1), Some(entity1)); + assert_eq!(builder.get_entity(file_id2), Some(entity2)); + assert_eq!(builder.get_entity(file_id3), Some(entity3)); + } + + #[test] + fn test_world_builder_get_nonexistent_entity() { + let builder = WorldBuilder::new(); + let file_id = FileID::new(999); + + assert_eq!(builder.get_entity(file_id), None); + assert!(!builder.has_entity(file_id)); + } + + #[test] + fn test_world_builder_clear_mappings() { + let mut builder = WorldBuilder::new(); + builder.create_entity(FileID::new(123)); + builder.create_entity(FileID::new(456)); + + assert_eq!(builder.entity_count(), 2); + + builder.clear_mappings(); + + assert_eq!(builder.entity_count(), 0); + assert!(!builder.has_entity(FileID::new(123))); + assert!(!builder.has_entity(FileID::new(456))); + } + + #[test] + fn test_world_builder_world_access() { + let mut builder = WorldBuilder::new(); + + // Test immutable access + let _world_ref = builder.world(); + + // Test mutable access + let _world_mut = builder.world_mut(); + + // Should compile fine + assert_eq!(builder.entity_count(), 0); + } + + #[test] + fn test_world_builder_build() { + let mut builder = WorldBuilder::new(); + builder.create_entity(FileID::new(123)); + + let _world = builder.build(); + // World is consumed, builder is no longer accessible + } + + #[test] + fn test_world_builder_stats() { + let mut builder = WorldBuilder::new(); + builder.create_entity(FileID::new(100)); + builder.create_entity(FileID::new(200)); + + let stats = builder.stats(); + assert_eq!(stats.entity_count, 2); + assert_eq!(stats.world_entity_count, 2); + + // Test Display implementation + let stats_str = format!("{}", stats); + assert!(stats_str.contains("2")); + } + + #[test] + fn test_world_builder_overwrite_entity() { + let mut builder = WorldBuilder::new(); + let file_id = FileID::new(123); + + let entity1 = builder.create_entity(file_id); + let entity2 = builder.create_entity(file_id); + + // Should overwrite the mapping + assert_eq!(builder.entity_count(), 1); + assert_eq!(builder.get_entity(file_id), Some(entity2)); + assert_ne!(entity1, entity2); // Different entities + } + + #[test] + fn test_load_scene_placeholder() { + use std::path::Path; + + // This is a placeholder function for now + let result = load_scene(Path::new("test.unity")); + assert!(result.is_ok()); + } +}