From 607a6468bb7d8bcac7172b7ad591cbf65563e574 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 30 Dec 2025 18:48:18 +0900 Subject: [PATCH] docs --- .claude/settings.local.json | 7 + DESIGN.md | 192 ++++++++++++++++++ ROADMAP.md | 394 ++++++++++++++++++++++++++++++++++++ 3 files changed, 593 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 DESIGN.md create mode 100644 ROADMAP.md diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..c5ce083 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "Bash(cat:*)" + ] + } +} diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000..100042a --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,192 @@ +# Cursebreaker Unity Parser - Design Document + +## Project Overview + +A high-performance Rust library for parsing and querying Unity project files (.unity scenes, .prefab prefabs, and .asset ScriptableObjects). + +## Goals + +1. **Parse Unity YAML Format**: Handle Unity's YAML 1.1 format with custom tags (`!u!`) and file ID references +2. **Extract Structure**: Parse GameObjects, Components, and their properties into queryable data structures +3. **High Performance**: Optimized for large Unity projects with minimal memory footprint +4. **Type Safety**: Strong typing for Unity's component system +5. **Library-First**: Designed as a reusable SDK for other Rust tools + +## Target File Formats + +- `.unity` - Unity scene files +- `.prefab` - Unity prefab files +- `.asset` - Unity ScriptableObject and other asset files + +All three formats share the same underlying YAML structure with Unity-specific extensions. + +## Unity File Format Structure + +Unity files use YAML 1.1 with special conventions: + +```yaml +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!1 &1866116814460599870 +GameObject: + m_ObjectHideFlags: 0 + m_Component: + - component: {fileID: 8151827567463220614} + - component: {fileID: 8755205353704683373} + m_Name: CardGrabber +--- !u!224 &8151827567463220614 +RectTransform: + m_GameObject: {fileID: 1866116814460599870} + m_LocalPosition: {x: 0, y: 0, z: 0} +``` + +### Key Concepts + +1. **Documents**: Each `---` starts a new YAML document representing a Unity object +2. **Type Tags**: `!u!N` indicates Unity type (e.g., `!u!1` = GameObject, `!u!224` = RectTransform) +3. **Anchors**: `&ID` defines a local file ID for the object +4. **File References**: `{fileID: N}` references objects by their ID (local or external) +5. **GUID References**: `{guid: ...}` references external assets +6. **Properties**: All Unity objects have serialized fields (usually prefixed with `m_`) + +## Architecture + +### Core Components + +``` +cursebreaker-parser/ +├── src/ +│ ├── lib.rs # Public API exports +│ ├── parser/ # YAML parsing layer +│ │ ├── mod.rs +│ │ ├── yaml.rs # YAML document parser +│ │ ├── unity_tag.rs # Unity type tag handler (!u!) +│ │ └── reference.rs # FileID/GUID reference parser +│ ├── model/ # Data model +│ │ ├── mod.rs +│ │ ├── document.rs # UnityDocument struct +│ │ ├── object.rs # UnityObject base +│ │ ├── gameobject.rs # GameObject type +│ │ ├── component.rs # Component types +│ │ └── property.rs # Property value types +│ ├── types/ # Unity type system +│ │ ├── mod.rs +│ │ ├── type_id.rs # Unity type ID -> name mapping +│ │ └── component_types.rs +│ ├── query/ # Query API +│ │ ├── mod.rs +│ │ ├── project.rs # UnityProject (multi-file) +│ │ ├── find.rs # Find objects/components +│ │ └── filter.rs # Filter/search utilities +│ └── error.rs # Error types +``` + +### Data Model + +```rust +// Core types +pub struct UnityFile { + pub path: PathBuf, + pub documents: Vec, +} + +pub struct UnityDocument { + pub type_id: u32, // From !u!N + pub file_id: i64, // From &ID + pub class_name: String, // E.g., "GameObject" + pub properties: PropertyMap, +} + +pub struct UnityProject { + pub files: HashMap, + // Reference resolution cache +} + +// Property values (simplified) +pub enum PropertyValue { + Integer(i64), + Float(f64), + String(String), + Boolean(bool), + FileRef { file_id: i64, guid: Option }, + Vector3 { x: f64, y: f64, z: f64 }, + Color { r: f64, g: f64, b: f64, a: f64 }, + Array(Vec), + Object(PropertyMap), +} +``` + +## Performance Considerations + +1. **Streaming Parser**: Parse YAML incrementally rather than loading entire file into memory +2. **Lazy Loading**: Only parse files when accessed +3. **Reference Caching**: Cache resolved references to avoid repeated lookups +4. **Zero-Copy Where Possible**: Use string slices and borrowed data where feasible +5. **Parallel Parsing**: Support parsing multiple files concurrently + +## Dependencies + +- `yaml-rust2` or `serde_yaml` - YAML parsing (evaluate both) +- `serde` - Serialization/deserialization +- `rayon` - Parallel processing (optional, for multi-file parsing) +- `thiserror` - Error handling +- `indexmap` - Ordered maps for properties + +## Testing Strategy + +1. **Unit Tests**: Each parser component tested independently +2. **Integration Tests**: Full file parsing with real Unity files +3. **Sample Data**: Use PiratePanic project as test corpus +4. **Benchmarks**: Performance tests on large Unity projects +5. **Fuzzing**: Fuzz testing for parser robustness (future) + +## API Design Goals + +### Simple File Parsing +```rust +let file = UnityFile::from_path("Scene.unity")?; +for doc in &file.documents { + println!("{}: {}", doc.class_name, doc.file_id); +} +``` + +### Query API +```rust +let project = UnityProject::from_directory("Assets/")?; + +// Find all GameObjects +let objects = project.find_all_by_type("GameObject"); + +// Find by name +let player = project.find_by_name("Player")?; + +// Get components +let transform = player.get_component("Transform")?; +let position = transform.get_vector3("m_LocalPosition")?; +``` + +### Reference Resolution +```rust +// Follow references automatically +let gameobject = project.get_object(file_id)?; +let transform_ref = gameobject.get_file_ref("m_Component[0].component")?; +let transform = project.resolve_reference(transform_ref)?; +``` + +## Future Enhancements (Out of Scope for v1) + +- Unity YAML serialization (writing files) +- C# script parsing +- Asset dependency graphs +- Unity version detection and compatibility +- Binary .unity format support (older Unity versions) +- Meta file parsing (.meta files) + +## Success Criteria + +1. Successfully parse all files in PiratePanic sample project +2. Extract all GameObjects and Components with properties +3. Resolve all internal file references correctly +4. Parse large scene files (>10MB) in <100ms +5. Memory usage scales linearly with file size +6. Clean, documented public API diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..db4c87e --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,394 @@ +# Cursebreaker Unity Parser - [ ] Implementation Roadmap + +## Overview + +This roadmap breaks down the development into 5 phases, each building on the previous. Each phase has clear deliverables and success criteria. + +--- + +## Phase 1: Project Foundation & YAML Parsing + +**Goal**: Set up project structure and implement basic YAML parsing for Unity files + +### Tasks + +1. **Project Setup** + - [ ] Initialize Cargo project with workspace structure + - [ ] Add core dependencies (yaml parser, serde, thiserror) + - [ ] Set up basic module structure (lib.rs, parser/, model/, error.rs) + - [ ] Configure Cargo.toml with metadata and feature flags + +2. **Error Handling** + - [ ] Define error types (ParseError, ReferenceError, etc.) + - [ ] Implement Display and Error traits + - [ ] Set up Result type aliases + +3. **YAML Document Parser** + - [ ] Implement Unity YAML document reader + - [ ] Parse YAML 1.1 header and Unity tags + - [ ] Split multi-document YAML files into individual documents + - [ ] Handle `%TAG !u! tag:unity3d.com,2011:` directive + +4. **Unity Tag Parser** + - [ ] Parse Unity type tags (`!u!1`, `!u!224`, etc.) + - [ ] Extract type ID from tag + - [ ] Handle anchor IDs (`&12345`) + +5. **Basic Testing** + - [ ] Set up test infrastructure + - [ ] Create minimal test YAML files + - [ ] Unit tests for YAML splitting and tag parsing + - [ ] Integration test: parse simple Unity file + +### Deliverables +- [ ] ✓ Working Cargo project structure +- [ ] ✓ YAML documents successfully split from Unity files +- [ ] ✓ Unity type IDs and file IDs extracted +- [ ] ✓ Basic error handling in place +- [ ] ✓ Tests passing + +### Success Criteria +- [ ] Can read `Scene01MainMenu.unity` and split into individual documents +- [ ] Each document has correct type ID and file ID +- [ ] No panics on malformed input (returns errors) + +--- + +## Phase 2: Data Model & Property Parsing + +**Goal**: Build the core data model and parse Unity properties into structured data + +### Tasks + +1. **Core Data Structures** + - [ ] Implement `UnityDocument` struct + - [ ] Implement `UnityFile` struct + - [ ] Create property storage (PropertyMap using IndexMap) + - [ ] Define FileID and LocalID types + +2. **Property Value Types** + - [ ] Implement `PropertyValue` enum (Integer, Float, String, Boolean, etc.) + - [ ] Add Vector3, Color, Quaternion value types + - [ ] Add Array and nested Object support + - [ ] Implement Debug and Display for PropertyValue + +3. **Property Parser** + - [ ] Parse YAML mappings into PropertyMap + - [ ] Handle nested properties (paths like `m_Component[0].component`) + - [ ] Parse Unity-specific formats: + - [ ] `{fileID: N}` references + - [ ] `{x: 0, y: 0, z: 0}` vectors + - [ ] `{r: 1, g: 1, b: 1, a: 1}` colors + - [ ] `{guid: ..., type: N}` external references + +4. **GameObject & Component Models** + - [ ] Create specialized GameObject struct + - [ ] Create base Component trait/struct + - [ ] Add common component types (Transform, RectTransform, etc.) + - [ ] Helper methods for accessing common properties + +5. **Testing** + - [ ] Unit tests for property parsing + - [ ] Test all PropertyValue variants + - [ ] Integration test: parse GameObject with components + - [ ] Snapshot tests using sample Unity files + +### Deliverables +- [ ] ✓ Complete data model implemented +- [ ] ✓ Properties parsed into type-safe structures +- [ ] ✓ GameObject and Component abstractions working +- [ ] ✓ All property types handled correctly + +### Success Criteria +- [ ] Parse entire `CardGrabber.prefab` correctly +- [ ] Extract all GameObject properties (name, components list) +- [ ] Extract all Component properties with correct types +- [ ] Can access nested properties programmatically + +--- + +## Phase 3: Reference Resolution & Unity Type System + +**Goal**: Resolve references between objects and implement Unity's type system + +### Tasks + +1. **Reference Types** + - [ ] Implement `FileReference` struct (fileID + optional GUID) + - [ ] Implement `LocalReference` (within-file references) + - [ ] Implement `ExternalReference` (cross-file GUID references) + - [ ] Add reference equality and comparison + +2. **Type ID Mapping** + - [ ] Create Unity type ID → class name mapping + - [ ] Common types: GameObject(1), Transform(4), MonoBehaviour(114), etc. + - [ ] Load type mappings from data file or hardcode common ones + - [ ] Support unknown type IDs gracefully + +3. **Reference Resolution** + - [ ] Implement within-file reference resolution + - [ ] Cache resolved references for performance + - [ ] Handle cyclic references safely + - [ ] Detect and report broken references + +4. **UnityProject Multi-File Support** + - [ ] Implement `UnityProject` struct + - [ ] Load multiple Unity files into project + - [ ] Build file ID → document index + - [ ] Cross-file reference resolution (GUID-based) + +5. **Query Helpers** + - [ ] Find object by file ID + - [ ] Find objects by type + - [ ] Find objects by name + - [ ] Get component from GameObject + - [ ] Follow reference chains + +6. **Testing** + - [ ] Test reference resolution within single file + - [ ] Test cross-file references (scene → prefab) + - [ ] Test broken reference handling + - [ ] Test circular reference detection + +### Deliverables +- [ ] ✓ All references within files resolved correctly +- [ ] ✓ Type ID system working with common Unity types +- [ ] ✓ UnityProject can load and query multiple files +- [ ] ✓ Query API functional + +### Success Criteria +- [ ] Load entire PiratePanic/Scenes/ directory +- [ ] Resolve all GameObject → Component references +- [ ] Resolve prefab references from scenes +- [ ] Find objects by name across entire project +- [ ] Handle missing references gracefully + +--- + +## Phase 4: Optimization & Robustness + +**Goal**: Optimize performance and handle edge cases + +### Tasks + +1. **Performance Optimization** + - [ ] Profile parsing performance on large files + - [ ] Implement string interning for common property names + - [ ] Optimize property access paths (cache lookups) + - [ ] Consider zero-copy parsing where possible + - [ ] Add lazy loading for large projects + +2. **Memory Optimization** + - [ ] Measure memory usage on large projects + - [ ] Use Cow<str> where appropriate + - [ ] Pool allocations for common types + - [ ] Implement Drop for cleanup + - [ ] Add memory usage benchmarks + +3. **Parallel Processing** + - [ ] Add optional rayon dependency + - [ ] Parallel file loading + - [ ] Parallel document parsing within files + - [ ] Thread-safe caching + +4. **Error Recovery** + - [ ] Graceful degradation on parse errors + - [ ] Partial file parsing (skip invalid documents) + - [ ] Better error messages with context + - [ ] Error recovery suggestions + +5. **Edge Cases** + - [ ] Handle very large files (>100MB scenes) + - [ ] Handle deeply nested properties + - [ ] Handle unusual property types + - [ ] Handle legacy Unity versions (different YAML formats) + - [ ] Handle corrupted files + +6. **Comprehensive Testing** + - [ ] Parse entire PiratePanic project + - [ ] Parse various Unity project versions + - [ ] Stress tests with large files + - [ ] Fuzz testing setup (optional) + - [ ] Property-based tests + +### Deliverables +- [ ] ✓ Optimized parsing (<100ms for 10MB file) +- [ ] ✓ Low memory footprint (linear scaling) +- [ ] ✓ Parallel parsing support +- [ ] ✓ Robust error handling +- [ ] ✓ Comprehensive test suite + +### Success Criteria +- [ ] Parse 10MB scene file in <100ms +- [ ] Parse entire PiratePanic project in <1s +- [ ] Memory usage < 2x file size +- [ ] 100% of PiratePanic files parse successfully +- [ ] No panics on malformed input + +--- + +## Phase 5: API Polish & Documentation + +**Goal**: Finalize public API and create excellent documentation + +### Tasks + +1. **API Review & Refinement** + - [ ] Review all public APIs for consistency + - [ ] Add convenience methods based on common use cases + - [ ] Ensure ergonomic API design + - [ ] Add builder patterns where appropriate + - [ ] Minimize unsafe code, document when necessary + +2. **Type Safety Improvements** + - [ ] Add type-safe component access methods + - [ ] Strongly-typed property getters + - [ ] Generic query API improvements + - [ ] Consider proc macros for component definitions (optional) + +3. **Documentation** + - [ ] Write comprehensive rustdoc for all public items + - [ ] Add code examples to every public function + - [ ] Create module-level documentation + - [ ] Write getting started guide + - [ ] Create cookbook with common tasks + +4. **Examples** + - [ ] Basic parsing example + - [ ] Query API example + - [ ] Reference resolution example + - [ ] Multi-file project example + - [ ] Performance tips example + +5. **README & Guides** + - [ ] Professional README.md + - [ ] Architecture documentation + - [ ] Contributing guide + - [ ] Changelog template + - [ ] License file (Apache 2.0 or MIT) + +6. **CI/CD Setup** + - [ ] GitHub Actions workflow + - [ ] Run tests on PR + - [ ] Clippy lints + - [ ] Format checking + - [ ] Code coverage reporting + - [ ] Benchmark tracking + +7. **Benchmarks** + - [ ] Benchmark suite for common operations + - [ ] Track performance over time + - [ ] Document performance characteristics + - [ ] Comparison with other parsers (if any exist) + +### Deliverables +- [ ] ✓ Clean, documented public API +- [ ] ✓ Comprehensive rustdoc with examples +- [ ] ✓ README and getting started guide +- [ ] ✓ Working examples +- [ ] ✓ CI/CD pipeline + +### Success Criteria +- [ ] Every public item has rustdoc +- [ ] At least 3 working examples +- [ ] CI passes on all commits +- [ ] README clearly explains usage +- [ ] Someone new can use library from docs alone + +--- + +## Phase 6: Future Enhancements (Post-v1.0) + +These are potential features for future versions: + +### Advanced Querying +- [ ] XPath-like query language for Unity objects +- [ ] Filter DSL for complex searches +- [ ] Object graph traversal API +- [ ] Dependency analysis tools + +### Write Support +- [ ] Modify Unity files programmatically +- [ ] Create new Unity objects +- [ ] Safe YAML serialization +- [ ] Preserve formatting and comments + +### Additional Formats +- [ ] .meta file parsing +- [ ] TextMesh Pro asset files +- [ ] Unity package manifest parsing +- [ ] C# script analysis integration + +### Tooling +- [ ] CLI tool built on library +- [ ] Web service for Unity file analysis +- [ ] VS Code extension for Unity file viewing +- [ ] Unity Editor plugin for exporting metadata + +### Performance +- [ ] Binary format support (legacy Unity) +- [ ] Streaming API for huge files +- [ ] Incremental parsing (watch mode) +- [ ] Serialization/deserialization optimizations + +--- + +## Development Guidelines + +### Code Quality +- [ ] Follow Rust API guidelines +- [ ] Use clippy with strict lints +- [ ] Maintain >80% test coverage +- [ ] No unsafe unless absolutely necessary +- [ ] All public APIs must be documented + +### Testing Philosophy +- [ ] Unit test every parser component +- [ ] Integration tests for full workflows +- [ ] Use real Unity files from PiratePanic +- [ ] Add regression tests for bugs +- [ ] Benchmark critical paths + +### Version Strategy +- [ ] Semantic versioning (SemVer) +- [ ] 0.x.x during development +- [ ] 1.0.0 when API is stable +- [ ] Changelog for all versions +- [ ] No breaking changes in minor versions after 1.0 + +### Dependencies +- [ ] Minimize dependency count +- [ ] Use well-maintained crates only +- [ ] Avoid nightly features +- [ ] Keep MSRV (Minimum Supported Rust Version) reasonable +- [ ] Document all feature flags + +--- + +## Estimated Milestones + +These are rough estimates for a single developer working part-time: + +- [ ] **Phase 1**: 1-2 weeks +- [ ] **Phase 2**: 2-3 weeks +- [ ] **Phase 3**: 2-3 weeks +- [ ] **Phase 4**: 1-2 weeks +- [ ] **Phase 5**: 1-2 weeks + +**Total: 7-12 weeks to v1.0** + +Phases can overlap and tasks can be parallelized. Testing happens continuously throughout all phases. + +--- + +## Getting Started + +To begin implementation: + +1. Start with Phase 1, Task 1 (Project Setup) +2. Work through tasks sequentially within each phase +3. Complete all deliverables before moving to next phase +4. Use PiratePanic sample project for testing throughout +5. Iterate based on what you learn from the Unity files + +Remember: Start simple, make it work, then make it fast. Focus on correctness and API design in early phases, optimization comes later.