//! Unity YAML parsing module pub mod guid_resolver; pub mod meta; pub mod prefab_guid_resolver; mod unity_tag; mod yaml; pub use guid_resolver::{find_project_root, GuidResolver}; pub use meta::{get_meta_path, MetaFile}; pub use prefab_guid_resolver::PrefabGuidResolver; pub use unity_tag::{parse_unity_tag, UnityTag}; pub use yaml::split_yaml_documents; use log::{info, warn}; use crate::model::{RawDocument, UnityAsset, UnityFile, UnityPrefab, UnityScene, UnityProject}; use crate::types::{FileID, Guid, TypeFilter}; use crate::{Error, Result}; use regex::Regex; use std::path::Path; /// Parse a Unity file from the given path /// /// Automatically detects file type based on extension: /// - .unity → UnityFile::Scene with ECS World /// - .prefab → UnityFile::Prefab with raw YAML /// - .asset → UnityFile::Asset with raw YAML /// /// By default, parses all files. Use `parse_unity_file_filtered` for regex filtering. /// /// # Example /// /// ```no_run /// use unity_parser::parser::parse_unity_file; /// use unity_parser::UnityFile; /// use std::path::Path; /// /// let file = parse_unity_file(Path::new("Scene.unity"))?; /// match file { /// UnityFile::Scene(scene) => println!("Scene with {} entities", scene.entity_map.len()), /// UnityFile::Prefab(prefab) => println!("Prefab with {} documents", prefab.documents.len()), /// UnityFile::Asset(asset) => println!("Asset with {} documents", asset.documents.len()), /// } /// # Ok::<(), unity_parser::Error>(()) /// ``` pub fn parse_unity_file(path: &Path) -> Result { parse_unity_file_filtered(path, None, None) } /// Parse a Unity file with optional regex filtering and type filtering /// /// Same as `parse_unity_file`, but allows filtering files by path pattern and Unity types. /// If the path doesn't match the regex, returns an error. /// /// # Arguments /// /// * `path` - Path to the Unity file to parse /// * `filter` - Optional regex to match against the file path. If None, parses all files (default behavior). /// * `type_filter` - Optional filter for Unity types and MonoBehaviour GUIDs. If None, parses all types (default behavior). /// /// # Example /// /// ```no_run /// use unity_parser::parser::{parse_unity_file_filtered}; /// use unity_parser::TypeFilter; /// use regex::Regex; /// use std::path::Path; /// use std::collections::HashSet; /// /// // Only parse files with "Test" in the name /// let filter = Regex::new(r"Test").unwrap(); /// let file = parse_unity_file_filtered(Path::new("TestScene.unity"), Some(&filter), None)?; /// /// // Only parse Transform and GameObject types /// let mut types = HashSet::new(); /// types.insert("Transform".to_string()); /// types.insert("GameObject".to_string()); /// let type_filter = TypeFilter::with_unity_types(types); /// let file2 = parse_unity_file_filtered(Path::new("Scene.unity"), None, Some(&type_filter))?; /// # Ok::<(), unity_parser::Error>(()) /// ``` pub fn parse_unity_file_filtered( path: &Path, filter: Option<&Regex>, type_filter: Option<&TypeFilter>, ) -> Result { // Apply filter if provided if let Some(regex) = filter { let path_str = path.to_str().ok_or_else(|| { Error::invalid_format("Path contains invalid UTF-8") })?; if !regex.is_match(path_str) { return Err(Error::invalid_format(format!( "Path '{}' does not match filter pattern", path.display() ))); } } parse_unity_file_impl(path, type_filter) } /// Internal implementation of Unity file parsing fn parse_unity_file_impl(path: &Path, type_filter: Option<&TypeFilter>) -> Result { // Read the file let content = std::fs::read_to_string(path)?; // Validate Unity header validate_unity_header(&content, path)?; // Detect file type by extension let file_type = detect_file_type(path); // Parse based on file type match file_type { FileType::Scene => parse_scene(path, &content, type_filter), FileType::Prefab => parse_prefab(path, &content, type_filter), FileType::Asset => parse_asset(path, &content, type_filter), FileType::Unknown => Err(Error::invalid_format(format!( "Unknown file extension: {}", path.display() ))), } } /// File type enumeration enum FileType { Scene, Prefab, Asset, Unknown, } /// Detect file type based on extension fn detect_file_type(path: &Path) -> FileType { match path.extension().and_then(|s| s.to_str()) { Some("unity") => FileType::Scene, Some("prefab") => FileType::Prefab, Some("asset") => FileType::Asset, _ => FileType::Unknown, } } /// Parse a scene file into an ECS World fn parse_scene(path: &Path, content: &str, type_filter: Option<&TypeFilter>) -> Result { let raw_documents = parse_raw_documents(content, type_filter)?; // Try to find Unity project root and build both GUID resolvers let (guid_resolver, prefab_guid_resolver) = match find_project_root(path) { Ok(project_root) => { info!("📦 Found Unity project root: {}", project_root.display()); // Build script GUID resolver let guid_res = match GuidResolver::from_project(&project_root) { Ok(resolver) => { info!("Script GUID resolver built ({} mappings)", resolver.len()); Some(resolver) } Err(e) => { warn!("Failed to build script GUID resolver: {}", e); None } }; // Build prefab GUID resolver let prefab_res = match PrefabGuidResolver::from_project(&project_root) { Ok(resolver) => { info!("Prefab GUID resolver built ({} mappings)", resolver.len()); Some(resolver) } Err(e) => { warn!("Failed to build prefab GUID resolver: {}", e); None } }; (guid_res, prefab_res) } Err(_) => { // Not part of a Unity project, or project root not found (None, None) } }; // Build ECS world from documents with both resolvers let (world, entity_map) = crate::ecs::build_world_from_documents( raw_documents, guid_resolver.as_ref(), prefab_guid_resolver.as_ref(), )?; Ok(UnityFile::Scene(UnityScene::new( path.to_path_buf(), world, entity_map, ))) } /// Parse a scene file using pre-built GUID resolvers from a UnityProject /// /// This is more efficient than `parse_scene` when parsing multiple scenes /// because the GUID resolvers are already initialized. /// /// # Arguments /// /// * `path` - Path to the scene file /// * `project` - Pre-initialized UnityProject with GUID resolvers pub fn parse_scene_with_project(path: &Path, project: &UnityProject) -> Result { parse_scene_with_project_filtered(path, project, None) } /// Parse a scene file using pre-built GUID resolvers from a UnityProject with optional type filtering /// /// # Arguments /// /// * `path` - Path to the scene file /// * `project` - Pre-initialized UnityProject with GUID resolvers /// * `type_filter` - Optional filter for Unity types and MonoBehaviour class names pub fn parse_scene_with_project_filtered( path: &Path, project: &UnityProject, type_filter: Option<&TypeFilter> ) -> Result { // Read the file let content = std::fs::read_to_string(path)?; // Validate Unity header validate_unity_header(&content, path)?; // Parse raw documents with type filtering let raw_documents = parse_raw_documents(&content, type_filter)?; // Build ECS world from documents using project's resolvers let (world, entity_map) = crate::ecs::build_world_from_documents( raw_documents, Some(&project.guid_resolver), Some(&project.prefab_resolver), )?; Ok(UnityScene::new( path.to_path_buf(), world, entity_map, )) } /// Parse a prefab file using pre-built GUID resolvers from a UnityProject /// /// # Arguments /// /// * `path` - Path to the prefab file /// * `project` - Pre-initialized UnityProject with GUID resolvers pub fn parse_prefab_with_project(path: &Path, _project: &UnityProject) -> Result { // Read the file let content = std::fs::read_to_string(path)?; // Validate Unity header validate_unity_header(&content, path)?; // Parse raw documents let raw_documents = parse_raw_documents(&content, None)?; Ok(UnityPrefab::new( path.to_path_buf(), raw_documents, )) } /// Parse an asset file using pre-built GUID resolvers from a UnityProject /// /// # Arguments /// /// * `path` - Path to the asset file /// * `project` - Pre-initialized UnityProject with GUID resolvers pub fn parse_asset_with_project(path: &Path, _project: &UnityProject) -> Result { // Read the file let content = std::fs::read_to_string(path)?; // Validate Unity header validate_unity_header(&content, path)?; // Parse raw documents let raw_documents = parse_raw_documents(&content, None)?; Ok(UnityAsset::new( path.to_path_buf(), raw_documents, )) } /// Parse a prefab file into raw YAML documents fn parse_prefab(path: &Path, content: &str, type_filter: Option<&TypeFilter>) -> Result { let raw_documents = parse_raw_documents(content, type_filter)?; Ok(UnityFile::Prefab(UnityPrefab::new( path.to_path_buf(), raw_documents, ))) } /// Parse an asset file into raw YAML documents fn parse_asset(path: &Path, content: &str, type_filter: Option<&TypeFilter>) -> Result { let raw_documents = parse_raw_documents(content, type_filter)?; Ok(UnityFile::Asset(UnityAsset::new( path.to_path_buf(), raw_documents, ))) } /// Parse raw YAML documents from file content with optional type filtering fn parse_raw_documents(content: &str, type_filter: Option<&TypeFilter>) -> Result> { // Split into individual YAML documents let raw_docs = split_yaml_documents(content)?; // Parse each document raw_docs .iter() .filter_map(|raw| parse_raw_document(raw, type_filter).transpose()) .collect() } /// Parse a single raw YAML document into a RawDocument with optional type filtering fn parse_raw_document(raw_doc: &str, type_filter: Option<&TypeFilter>) -> Result> { // Parse the Unity tag line (e.g., "--- !u!1 &12345") let tag = match parse_unity_tag(raw_doc) { Some(tag) => tag, None => return Ok(None), // Skip documents without Unity tags }; // Extract the YAML content (everything after the tag line) let yaml_content = extract_yaml_content(raw_doc); if yaml_content.trim().is_empty() { return Ok(None); } // Early filtering: Extract class name without full YAML parsing if let Some(filter) = type_filter { if filter.is_filtering() { // Extract the class name efficiently let class_name = match extract_class_name(yaml_content) { Some(name) => name, None => return Ok(None), // Can't extract class name, skip }; // Check if this is a MonoBehaviour if class_name == "MonoBehaviour" { // For MonoBehaviour, we need to check the m_Script GUID match extract_monobehaviour_guid(yaml_content) { Some(guid) => { if !filter.should_parse_guid(&guid) { // GUID not in whitelist, skip this document return Ok(None); } } None => { // Can't extract GUID, skip this MonoBehaviour return Ok(None); } } } else { // For non-MonoBehaviour, check the Unity type whitelist if !filter.should_parse_type(class_name) { // Type not in whitelist, skip this document return Ok(None); } } } } // If we reach here, the document passed the filter (or no filter was applied) // Now do the full YAML parsing let yaml_value: serde_yaml::Value = serde_yaml::from_str(yaml_content)?; // Unity documents have format "GameObject: { ... }" // Extract class name and inner YAML let (class_name, inner_yaml) = match &yaml_value { serde_yaml::Value::Mapping(map) if map.len() == 1 => { // Single-key mapping - this is the standard Unity format let (key, value) = map.iter().next().unwrap(); let class_name = key .as_str() .ok_or_else(|| Error::invalid_format("Class name must be string"))? .to_string(); (class_name, value.clone()) } _ => { // Fallback for malformed documents let class_name = format!("UnityType{}", tag.type_id); (class_name, yaml_value) } }; Ok(Some(RawDocument::new( tag.type_id, FileID::from_i64(tag.file_id), class_name, inner_yaml, tag.is_stripped, ))) } /// Validate that the file has a proper Unity YAML header fn validate_unity_header(content: &str, path: &Path) -> Result<()> { let has_yaml_header = content.starts_with("%YAML"); let has_unity_tag = content.contains("%TAG !u! tag:unity3d.com"); if !has_yaml_header || !has_unity_tag { return Err(Error::MissingHeader(path.to_path_buf())); } Ok(()) } /// Extract the YAML content from a raw document (skip the Unity tag line) fn extract_yaml_content(raw_doc: &str) -> &str { // Find the first newline after the "--- !u!" tag if let Some(first_line_end) = raw_doc.find('\n') { &raw_doc[first_line_end + 1..] } else { "" } } /// Extract the Unity class name from YAML content without full parsing /// /// Unity documents have the format: /// ```yaml /// ClassName: /// field1: value1 /// field2: value2 /// ``` /// /// This function extracts "ClassName" efficiently without parsing the entire YAML. fn extract_class_name(yaml_content: &str) -> Option<&str> { // Find the first line that's not empty let first_line = yaml_content.lines().find(|line| !line.trim().is_empty())?; // Class name is the first non-whitespace text before ':' let class_name = first_line.trim().strip_suffix(':')?; Some(class_name) } /// Extract the m_Script GUID from a MonoBehaviour YAML document without full parsing /// /// MonoBehaviour documents have the format: /// ```yaml /// MonoBehaviour: /// m_Script: {fileID: 11500000, guid: d39ddbf1c2c3d1a4baa070e5e76548bd, type: 3} /// ... /// ``` /// /// Or multi-line format: /// ```yaml /// MonoBehaviour: /// m_Script: /// fileID: 11500000 /// guid: d39ddbf1c2c3d1a4baa070e5e76548bd /// type: 3 /// ``` /// /// This function extracts the GUID value efficiently. fn extract_monobehaviour_guid(yaml_content: &str) -> Option { // Look for any line with "guid: <32 hex chars>" // This works for both inline and multi-line formats for line in yaml_content.lines() { if line.contains("guid:") { // Find "guid: " and extract the 32-character hex string after it if let Some(guid_start) = line.find("guid:") { let after_guid = &line[guid_start + 5..].trim(); // Extract the hex string (32 characters) let guid_str: String = after_guid .chars() .take_while(|c| c.is_ascii_hexdigit()) .collect(); if guid_str.len() == 32 { return Guid::from_hex(&guid_str).ok(); } } } } None } #[cfg(test)] mod tests { use super::*; #[test] fn test_validate_unity_header() { let valid_content = "%YAML 1.1\n%TAG !u! tag:unity3d.com,2011:\n"; assert!(validate_unity_header(valid_content, Path::new("test.unity")).is_ok()); let invalid_content = "Not a Unity file"; assert!(validate_unity_header(invalid_content, Path::new("test.unity")).is_err()); } #[test] fn test_extract_yaml_content() { let raw_doc = "--- !u!1 &12345\nGameObject:\n m_Name: Test"; let content = extract_yaml_content(raw_doc); assert_eq!(content, "GameObject:\n m_Name: Test"); } #[test] fn test_detect_file_type() { assert!(matches!( detect_file_type(Path::new("test.unity")), FileType::Scene )); assert!(matches!( detect_file_type(Path::new("test.prefab")), FileType::Prefab )); assert!(matches!( detect_file_type(Path::new("test.asset")), FileType::Asset )); assert!(matches!( detect_file_type(Path::new("test.txt")), FileType::Unknown )); } #[test] fn test_parse_unity_file_filtered_accepts_matching_path() { use regex::Regex; let filter = Regex::new(r"Test").unwrap(); let path = Path::new("TestScene.unity"); // Should match and attempt to parse (will fail because file doesn't exist) let result = parse_unity_file_filtered(path, Some(&filter), None); assert!(result.is_err()); // Error should be IO error (file not found), not filter error match result { Err(e) => { let err_msg = e.to_string(); assert!( !err_msg.contains("does not match filter"), "Should not be a filter error, got: {}", err_msg ); } Ok(_) => panic!("Expected error for non-existent file"), } } #[test] fn test_parse_unity_file_filtered_rejects_non_matching_path() { use regex::Regex; let filter = Regex::new(r"Test").unwrap(); let path = Path::new("MainScene.unity"); // Should reject due to filter let result = parse_unity_file_filtered(path, Some(&filter), None); assert!(result.is_err()); // Error should be filter error match result { Err(e) => { let err_msg = e.to_string(); assert!( err_msg.contains("does not match filter"), "Expected filter error, got: {}", err_msg ); } Ok(_) => panic!("Expected filter error"), } } #[test] fn test_parse_unity_file_filtered_none_accepts_all() { let path = Path::new("AnyScene.unity"); // No filter should accept any path (will fail with IO error) let result = parse_unity_file_filtered(path, None, None); assert!(result.is_err()); // Should be IO error, not filter error match result { Err(e) => { let err_msg = e.to_string(); assert!( !err_msg.contains("does not match filter"), "Should not be a filter error with None filter, got: {}", err_msg ); } Ok(_) => panic!("Expected IO error for non-existent file"), } } #[test] fn test_parse_unity_file_uses_default_filter() { let path = Path::new("AnyScene.unity"); // parse_unity_file should work the same as filtered with None let result1 = parse_unity_file(path); let result2 = parse_unity_file_filtered(path, None, None); // Both should have the same error (IO error for missing file) assert!(result1.is_err()); assert!(result2.is_err()); } #[test] fn test_extract_class_name() { let yaml = "GameObject:\n m_Name: Test"; assert_eq!(extract_class_name(yaml), Some("GameObject")); let yaml2 = "Transform:\n m_LocalPosition: {x: 1, y: 2, z: 3}"; assert_eq!(extract_class_name(yaml2), Some("Transform")); let yaml3 = "MonoBehaviour:\n m_Script: {fileID: 11500000}"; assert_eq!(extract_class_name(yaml3), Some("MonoBehaviour")); let empty = ""; assert_eq!(extract_class_name(empty), None); } #[test] fn test_extract_monobehaviour_guid() { let yaml = "MonoBehaviour:\n m_Script: {fileID: 11500000, guid: d39ddbf1c2c3d1a4baa070e5e76548bd, type: 3}"; let guid = extract_monobehaviour_guid(yaml); assert!(guid.is_some()); assert_eq!( guid.unwrap().to_hex(), "d39ddbf1c2c3d1a4baa070e5e76548bd" ); // Multi-line format let yaml2 = "MonoBehaviour:\n m_Script:\n fileID: 11500000\n guid: abc123def456789012345678901234ab\n type: 3"; let guid2 = extract_monobehaviour_guid(yaml2); assert!(guid2.is_some()); assert_eq!( guid2.unwrap().to_hex(), "abc123def456789012345678901234ab" ); let no_guid = "MonoBehaviour:\n m_Name: Test"; assert_eq!(extract_monobehaviour_guid(no_guid), None); } #[test] fn test_type_filter_document_parse_all() { let filter = TypeFilter::parse_all(); assert!(filter.should_parse_type("Transform")); assert!(filter.should_parse_type("GameObject")); assert!(filter.should_parse_type("AnyType")); assert!(!filter.is_filtering()); } #[test] fn test_type_filter_document_with_unity_types() { use std::collections::HashSet; let mut types = HashSet::new(); types.insert("Transform".to_string()); types.insert("GameObject".to_string()); let filter = TypeFilter::with_unity_types(types); assert!(filter.should_parse_type("Transform")); assert!(filter.should_parse_type("GameObject")); assert!(!filter.should_parse_type("RectTransform")); assert!(filter.is_filtering()); // Should still accept any MonoBehaviour GUID since we didn't set a GUID filter let guid = Guid::from_hex("d39ddbf1c2c3d1a4baa070e5e76548bd").unwrap(); assert!(filter.should_parse_guid(&guid)); } #[test] fn test_type_filter_document_with_monobehaviour_guids() { use std::collections::HashSet; let mut guids = HashSet::new(); let guid1 = Guid::from_hex("d39ddbf1c2c3d1a4baa070e5e76548bd").unwrap(); let guid2 = Guid::from_hex("abc123def456789012345678901234ab").unwrap(); guids.insert(guid1); guids.insert(guid2); let filter = TypeFilter::with_monobehaviour_guids(guids); assert!(filter.should_parse_guid(&guid1)); assert!(filter.should_parse_guid(&guid2)); let guid3 = Guid::from_hex("00000000000000000000000000000000").unwrap(); assert!(!filter.should_parse_guid(&guid3)); assert!(filter.is_filtering()); // Should still accept any Unity type since we didn't set a type filter assert!(filter.should_parse_type("Transform")); assert!(filter.should_parse_type("AnyType")); } #[test] fn test_type_filter_document_with_both() { use std::collections::HashSet; let mut types = HashSet::new(); types.insert("Transform".to_string()); let mut guids = HashSet::new(); let guid1 = Guid::from_hex("d39ddbf1c2c3d1a4baa070e5e76548bd").unwrap(); guids.insert(guid1); let filter = TypeFilter::with_both(types, guids); // Only Transform should pass assert!(filter.should_parse_type("Transform")); assert!(!filter.should_parse_type("GameObject")); // Only guid1 should pass assert!(filter.should_parse_guid(&guid1)); let guid2 = Guid::from_hex("abc123def456789012345678901234ab").unwrap(); assert!(!filter.should_parse_guid(&guid2)); assert!(filter.is_filtering()); } }