Files
cursebreaker-parser-rust/unity-parser/src/parser/mod.rs
2026-01-11 03:03:39 +00:00

733 lines
24 KiB
Rust

//! Unity YAML parsing module
pub mod guid_resolver;
pub mod meta;
pub mod prefab_guid_resolver;
mod unity_tag;
mod yaml;
pub use guid_resolver::{find_project_root, GuidResolver};
pub use meta::{get_meta_path, MetaFile};
pub use prefab_guid_resolver::PrefabGuidResolver;
pub use unity_tag::{parse_unity_tag, UnityTag};
pub use yaml::split_yaml_documents;
use log::{info, warn};
use crate::model::{RawDocument, UnityAsset, UnityFile, UnityPrefab, UnityScene, UnityProject};
use crate::types::{FileID, Guid, TypeFilter};
use crate::{Error, Result};
use regex::Regex;
use std::path::Path;
/// Parse a Unity file from the given path
///
/// Automatically detects file type based on extension:
/// - .unity → UnityFile::Scene with ECS World
/// - .prefab → UnityFile::Prefab with raw YAML
/// - .asset → UnityFile::Asset with raw YAML
///
/// By default, parses all files. Use `parse_unity_file_filtered` for regex filtering.
///
/// # Example
///
/// ```no_run
/// use unity_parser::parser::parse_unity_file;
/// use unity_parser::UnityFile;
/// use std::path::Path;
///
/// let file = parse_unity_file(Path::new("Scene.unity"))?;
/// match file {
/// UnityFile::Scene(scene) => println!("Scene with {} entities", scene.entity_map.len()),
/// UnityFile::Prefab(prefab) => println!("Prefab with {} documents", prefab.documents.len()),
/// UnityFile::Asset(asset) => println!("Asset with {} documents", asset.documents.len()),
/// }
/// # Ok::<(), unity_parser::Error>(())
/// ```
pub fn parse_unity_file(path: &Path) -> Result<UnityFile> {
parse_unity_file_filtered(path, None, None)
}
/// Parse a Unity file with optional regex filtering and type filtering
///
/// Same as `parse_unity_file`, but allows filtering files by path pattern and Unity types.
/// If the path doesn't match the regex, returns an error.
///
/// # Arguments
///
/// * `path` - Path to the Unity file to parse
/// * `filter` - Optional regex to match against the file path. If None, parses all files (default behavior).
/// * `type_filter` - Optional filter for Unity types and MonoBehaviour GUIDs. If None, parses all types (default behavior).
///
/// # Example
///
/// ```no_run
/// use unity_parser::parser::{parse_unity_file_filtered};
/// use unity_parser::TypeFilter;
/// use regex::Regex;
/// use std::path::Path;
/// use std::collections::HashSet;
///
/// // Only parse files with "Test" in the name
/// let filter = Regex::new(r"Test").unwrap();
/// let file = parse_unity_file_filtered(Path::new("TestScene.unity"), Some(&filter), None)?;
///
/// // Only parse Transform and GameObject types
/// let mut types = HashSet::new();
/// types.insert("Transform".to_string());
/// types.insert("GameObject".to_string());
/// let type_filter = TypeFilter::with_unity_types(types);
/// let file2 = parse_unity_file_filtered(Path::new("Scene.unity"), None, Some(&type_filter))?;
/// # Ok::<(), unity_parser::Error>(())
/// ```
pub fn parse_unity_file_filtered(
path: &Path,
filter: Option<&Regex>,
type_filter: Option<&TypeFilter>,
) -> Result<UnityFile> {
// Apply filter if provided
if let Some(regex) = filter {
let path_str = path.to_str().ok_or_else(|| {
Error::invalid_format("Path contains invalid UTF-8")
})?;
if !regex.is_match(path_str) {
return Err(Error::invalid_format(format!(
"Path '{}' does not match filter pattern",
path.display()
)));
}
}
parse_unity_file_impl(path, type_filter)
}
/// Internal implementation of Unity file parsing
fn parse_unity_file_impl(path: &Path, type_filter: Option<&TypeFilter>) -> Result<UnityFile> {
// Read the file
let content = std::fs::read_to_string(path)?;
// Validate Unity header
validate_unity_header(&content, path)?;
// Detect file type by extension
let file_type = detect_file_type(path);
// Parse based on file type
match file_type {
FileType::Scene => parse_scene(path, &content, type_filter),
FileType::Prefab => parse_prefab(path, &content, type_filter),
FileType::Asset => parse_asset(path, &content, type_filter),
FileType::Unknown => Err(Error::invalid_format(format!(
"Unknown file extension: {}",
path.display()
))),
}
}
/// File type enumeration
enum FileType {
Scene,
Prefab,
Asset,
Unknown,
}
/// Detect file type based on extension
fn detect_file_type(path: &Path) -> FileType {
match path.extension().and_then(|s| s.to_str()) {
Some("unity") => FileType::Scene,
Some("prefab") => FileType::Prefab,
Some("asset") => FileType::Asset,
_ => FileType::Unknown,
}
}
/// Parse a scene file into an ECS World
fn parse_scene(path: &Path, content: &str, type_filter: Option<&TypeFilter>) -> Result<UnityFile> {
let raw_documents = parse_raw_documents(content, type_filter)?;
// Try to find Unity project root and build both GUID resolvers
let (guid_resolver, prefab_guid_resolver) = match find_project_root(path) {
Ok(project_root) => {
info!("📦 Found Unity project root: {}", project_root.display());
// Build script GUID resolver
let guid_res = match GuidResolver::from_project(&project_root) {
Ok(resolver) => {
info!("Script GUID resolver built ({} mappings)", resolver.len());
Some(resolver)
}
Err(e) => {
warn!("Failed to build script GUID resolver: {}", e);
None
}
};
// Build prefab GUID resolver
let prefab_res = match PrefabGuidResolver::from_project(&project_root) {
Ok(resolver) => {
info!("Prefab GUID resolver built ({} mappings)", resolver.len());
Some(resolver)
}
Err(e) => {
warn!("Failed to build prefab GUID resolver: {}", e);
None
}
};
(guid_res, prefab_res)
}
Err(_) => {
// Not part of a Unity project, or project root not found
(None, None)
}
};
// Build ECS world from documents with both resolvers
let (world, entity_map) = crate::ecs::build_world_from_documents(
raw_documents,
guid_resolver.as_ref(),
prefab_guid_resolver.as_ref(),
)?;
Ok(UnityFile::Scene(UnityScene::new(
path.to_path_buf(),
world,
entity_map,
)))
}
/// Parse a scene file using pre-built GUID resolvers from a UnityProject
///
/// This is more efficient than `parse_scene` when parsing multiple scenes
/// because the GUID resolvers are already initialized.
///
/// # Arguments
///
/// * `path` - Path to the scene file
/// * `project` - Pre-initialized UnityProject with GUID resolvers
pub fn parse_scene_with_project(path: &Path, project: &UnityProject) -> Result<UnityScene> {
parse_scene_with_project_filtered(path, project, None)
}
/// Parse a scene file using pre-built GUID resolvers from a UnityProject with optional type filtering
///
/// # Arguments
///
/// * `path` - Path to the scene file
/// * `project` - Pre-initialized UnityProject with GUID resolvers
/// * `type_filter` - Optional filter for Unity types and MonoBehaviour class names
pub fn parse_scene_with_project_filtered(
path: &Path,
project: &UnityProject,
type_filter: Option<&TypeFilter>
) -> Result<UnityScene> {
// Read the file
let content = std::fs::read_to_string(path)?;
// Validate Unity header
validate_unity_header(&content, path)?;
// Parse raw documents with type filtering
let raw_documents = parse_raw_documents(&content, type_filter)?;
// Build ECS world from documents using project's resolvers
let (world, entity_map) = crate::ecs::build_world_from_documents(
raw_documents,
Some(&project.guid_resolver),
Some(&project.prefab_resolver),
)?;
Ok(UnityScene::new(
path.to_path_buf(),
world,
entity_map,
))
}
/// Parse a prefab file using pre-built GUID resolvers from a UnityProject
///
/// # Arguments
///
/// * `path` - Path to the prefab file
/// * `project` - Pre-initialized UnityProject with GUID resolvers
pub fn parse_prefab_with_project(path: &Path, _project: &UnityProject) -> Result<UnityPrefab> {
// Read the file
let content = std::fs::read_to_string(path)?;
// Validate Unity header
validate_unity_header(&content, path)?;
// Parse raw documents
let raw_documents = parse_raw_documents(&content, None)?;
Ok(UnityPrefab::new(
path.to_path_buf(),
raw_documents,
))
}
/// Parse an asset file using pre-built GUID resolvers from a UnityProject
///
/// # Arguments
///
/// * `path` - Path to the asset file
/// * `project` - Pre-initialized UnityProject with GUID resolvers
pub fn parse_asset_with_project(path: &Path, _project: &UnityProject) -> Result<UnityAsset> {
// Read the file
let content = std::fs::read_to_string(path)?;
// Validate Unity header
validate_unity_header(&content, path)?;
// Parse raw documents
let raw_documents = parse_raw_documents(&content, None)?;
Ok(UnityAsset::new(
path.to_path_buf(),
raw_documents,
))
}
/// Parse a prefab file into raw YAML documents
fn parse_prefab(path: &Path, content: &str, type_filter: Option<&TypeFilter>) -> Result<UnityFile> {
let raw_documents = parse_raw_documents(content, type_filter)?;
Ok(UnityFile::Prefab(UnityPrefab::new(
path.to_path_buf(),
raw_documents,
)))
}
/// Parse an asset file into raw YAML documents
fn parse_asset(path: &Path, content: &str, type_filter: Option<&TypeFilter>) -> Result<UnityFile> {
let raw_documents = parse_raw_documents(content, type_filter)?;
Ok(UnityFile::Asset(UnityAsset::new(
path.to_path_buf(),
raw_documents,
)))
}
/// Parse raw YAML documents from file content with optional type filtering
fn parse_raw_documents(content: &str, type_filter: Option<&TypeFilter>) -> Result<Vec<RawDocument>> {
// Split into individual YAML documents
let raw_docs = split_yaml_documents(content)?;
// Parse each document
raw_docs
.iter()
.filter_map(|raw| parse_raw_document(raw, type_filter).transpose())
.collect()
}
/// Parse a single raw YAML document into a RawDocument with optional type filtering
fn parse_raw_document(raw_doc: &str, type_filter: Option<&TypeFilter>) -> Result<Option<RawDocument>> {
// Parse the Unity tag line (e.g., "--- !u!1 &12345")
let tag = match parse_unity_tag(raw_doc) {
Some(tag) => tag,
None => return Ok(None), // Skip documents without Unity tags
};
// Extract the YAML content (everything after the tag line)
let yaml_content = extract_yaml_content(raw_doc);
if yaml_content.trim().is_empty() {
return Ok(None);
}
// Early filtering: Extract class name without full YAML parsing
if let Some(filter) = type_filter {
if filter.is_filtering() {
// Extract the class name efficiently
let class_name = match extract_class_name(yaml_content) {
Some(name) => name,
None => return Ok(None), // Can't extract class name, skip
};
// Check if this is a MonoBehaviour
if class_name == "MonoBehaviour" {
// For MonoBehaviour, we need to check the m_Script GUID
match extract_monobehaviour_guid(yaml_content) {
Some(guid) => {
if !filter.should_parse_guid(&guid) {
// GUID not in whitelist, skip this document
return Ok(None);
}
}
None => {
// Can't extract GUID, skip this MonoBehaviour
return Ok(None);
}
}
} else {
// For non-MonoBehaviour, check the Unity type whitelist
if !filter.should_parse_type(class_name) {
// Type not in whitelist, skip this document
return Ok(None);
}
}
}
}
// If we reach here, the document passed the filter (or no filter was applied)
// Now do the full YAML parsing
let yaml_value: serde_yaml::Value = serde_yaml::from_str(yaml_content)?;
// Unity documents have format "GameObject: { ... }"
// Extract class name and inner YAML
let (class_name, inner_yaml) = match &yaml_value {
serde_yaml::Value::Mapping(map) if map.len() == 1 => {
// Single-key mapping - this is the standard Unity format
let (key, value) = map.iter().next().unwrap();
let class_name = key
.as_str()
.ok_or_else(|| Error::invalid_format("Class name must be string"))?
.to_string();
(class_name, value.clone())
}
_ => {
// Fallback for malformed documents
let class_name = format!("UnityType{}", tag.type_id);
(class_name, yaml_value)
}
};
Ok(Some(RawDocument::new(
tag.type_id,
FileID::from_i64(tag.file_id),
class_name,
inner_yaml,
tag.is_stripped,
)))
}
/// Validate that the file has a proper Unity YAML header
fn validate_unity_header(content: &str, path: &Path) -> Result<()> {
let has_yaml_header = content.starts_with("%YAML");
let has_unity_tag = content.contains("%TAG !u! tag:unity3d.com");
if !has_yaml_header || !has_unity_tag {
return Err(Error::MissingHeader(path.to_path_buf()));
}
Ok(())
}
/// Extract the YAML content from a raw document (skip the Unity tag line)
fn extract_yaml_content(raw_doc: &str) -> &str {
// Find the first newline after the "--- !u!" tag
if let Some(first_line_end) = raw_doc.find('\n') {
&raw_doc[first_line_end + 1..]
} else {
""
}
}
/// Extract the Unity class name from YAML content without full parsing
///
/// Unity documents have the format:
/// ```yaml
/// ClassName:
/// field1: value1
/// field2: value2
/// ```
///
/// This function extracts "ClassName" efficiently without parsing the entire YAML.
fn extract_class_name(yaml_content: &str) -> Option<&str> {
// Find the first line that's not empty
let first_line = yaml_content.lines().find(|line| !line.trim().is_empty())?;
// Class name is the first non-whitespace text before ':'
let class_name = first_line.trim().strip_suffix(':')?;
Some(class_name)
}
/// Extract the m_Script GUID from a MonoBehaviour YAML document without full parsing
///
/// MonoBehaviour documents have the format:
/// ```yaml
/// MonoBehaviour:
/// m_Script: {fileID: 11500000, guid: d39ddbf1c2c3d1a4baa070e5e76548bd, type: 3}
/// ...
/// ```
///
/// Or multi-line format:
/// ```yaml
/// MonoBehaviour:
/// m_Script:
/// fileID: 11500000
/// guid: d39ddbf1c2c3d1a4baa070e5e76548bd
/// type: 3
/// ```
///
/// This function extracts the GUID value efficiently.
fn extract_monobehaviour_guid(yaml_content: &str) -> Option<Guid> {
// Look for any line with "guid: <32 hex chars>"
// This works for both inline and multi-line formats
for line in yaml_content.lines() {
if line.contains("guid:") {
// Find "guid: " and extract the 32-character hex string after it
if let Some(guid_start) = line.find("guid:") {
let after_guid = &line[guid_start + 5..].trim();
// Extract the hex string (32 characters)
let guid_str: String = after_guid
.chars()
.take_while(|c| c.is_ascii_hexdigit())
.collect();
if guid_str.len() == 32 {
return Guid::from_hex(&guid_str).ok();
}
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_unity_header() {
let valid_content = "%YAML 1.1\n%TAG !u! tag:unity3d.com,2011:\n";
assert!(validate_unity_header(valid_content, Path::new("test.unity")).is_ok());
let invalid_content = "Not a Unity file";
assert!(validate_unity_header(invalid_content, Path::new("test.unity")).is_err());
}
#[test]
fn test_extract_yaml_content() {
let raw_doc = "--- !u!1 &12345\nGameObject:\n m_Name: Test";
let content = extract_yaml_content(raw_doc);
assert_eq!(content, "GameObject:\n m_Name: Test");
}
#[test]
fn test_detect_file_type() {
assert!(matches!(
detect_file_type(Path::new("test.unity")),
FileType::Scene
));
assert!(matches!(
detect_file_type(Path::new("test.prefab")),
FileType::Prefab
));
assert!(matches!(
detect_file_type(Path::new("test.asset")),
FileType::Asset
));
assert!(matches!(
detect_file_type(Path::new("test.txt")),
FileType::Unknown
));
}
#[test]
fn test_parse_unity_file_filtered_accepts_matching_path() {
use regex::Regex;
let filter = Regex::new(r"Test").unwrap();
let path = Path::new("TestScene.unity");
// Should match and attempt to parse (will fail because file doesn't exist)
let result = parse_unity_file_filtered(path, Some(&filter), None);
assert!(result.is_err());
// Error should be IO error (file not found), not filter error
match result {
Err(e) => {
let err_msg = e.to_string();
assert!(
!err_msg.contains("does not match filter"),
"Should not be a filter error, got: {}",
err_msg
);
}
Ok(_) => panic!("Expected error for non-existent file"),
}
}
#[test]
fn test_parse_unity_file_filtered_rejects_non_matching_path() {
use regex::Regex;
let filter = Regex::new(r"Test").unwrap();
let path = Path::new("MainScene.unity");
// Should reject due to filter
let result = parse_unity_file_filtered(path, Some(&filter), None);
assert!(result.is_err());
// Error should be filter error
match result {
Err(e) => {
let err_msg = e.to_string();
assert!(
err_msg.contains("does not match filter"),
"Expected filter error, got: {}",
err_msg
);
}
Ok(_) => panic!("Expected filter error"),
}
}
#[test]
fn test_parse_unity_file_filtered_none_accepts_all() {
let path = Path::new("AnyScene.unity");
// No filter should accept any path (will fail with IO error)
let result = parse_unity_file_filtered(path, None, None);
assert!(result.is_err());
// Should be IO error, not filter error
match result {
Err(e) => {
let err_msg = e.to_string();
assert!(
!err_msg.contains("does not match filter"),
"Should not be a filter error with None filter, got: {}",
err_msg
);
}
Ok(_) => panic!("Expected IO error for non-existent file"),
}
}
#[test]
fn test_parse_unity_file_uses_default_filter() {
let path = Path::new("AnyScene.unity");
// parse_unity_file should work the same as filtered with None
let result1 = parse_unity_file(path);
let result2 = parse_unity_file_filtered(path, None, None);
// Both should have the same error (IO error for missing file)
assert!(result1.is_err());
assert!(result2.is_err());
}
#[test]
fn test_extract_class_name() {
let yaml = "GameObject:\n m_Name: Test";
assert_eq!(extract_class_name(yaml), Some("GameObject"));
let yaml2 = "Transform:\n m_LocalPosition: {x: 1, y: 2, z: 3}";
assert_eq!(extract_class_name(yaml2), Some("Transform"));
let yaml3 = "MonoBehaviour:\n m_Script: {fileID: 11500000}";
assert_eq!(extract_class_name(yaml3), Some("MonoBehaviour"));
let empty = "";
assert_eq!(extract_class_name(empty), None);
}
#[test]
fn test_extract_monobehaviour_guid() {
let yaml = "MonoBehaviour:\n m_Script: {fileID: 11500000, guid: d39ddbf1c2c3d1a4baa070e5e76548bd, type: 3}";
let guid = extract_monobehaviour_guid(yaml);
assert!(guid.is_some());
assert_eq!(
guid.unwrap().to_hex(),
"d39ddbf1c2c3d1a4baa070e5e76548bd"
);
// Multi-line format
let yaml2 = "MonoBehaviour:\n m_Script:\n fileID: 11500000\n guid: abc123def456789012345678901234ab\n type: 3";
let guid2 = extract_monobehaviour_guid(yaml2);
assert!(guid2.is_some());
assert_eq!(
guid2.unwrap().to_hex(),
"abc123def456789012345678901234ab"
);
let no_guid = "MonoBehaviour:\n m_Name: Test";
assert_eq!(extract_monobehaviour_guid(no_guid), None);
}
#[test]
fn test_type_filter_document_parse_all() {
let filter = TypeFilter::parse_all();
assert!(filter.should_parse_type("Transform"));
assert!(filter.should_parse_type("GameObject"));
assert!(filter.should_parse_type("AnyType"));
assert!(!filter.is_filtering());
}
#[test]
fn test_type_filter_document_with_unity_types() {
use std::collections::HashSet;
let mut types = HashSet::new();
types.insert("Transform".to_string());
types.insert("GameObject".to_string());
let filter = TypeFilter::with_unity_types(types);
assert!(filter.should_parse_type("Transform"));
assert!(filter.should_parse_type("GameObject"));
assert!(!filter.should_parse_type("RectTransform"));
assert!(filter.is_filtering());
// Should still accept any MonoBehaviour GUID since we didn't set a GUID filter
let guid = Guid::from_hex("d39ddbf1c2c3d1a4baa070e5e76548bd").unwrap();
assert!(filter.should_parse_guid(&guid));
}
#[test]
fn test_type_filter_document_with_monobehaviour_guids() {
use std::collections::HashSet;
let mut guids = HashSet::new();
let guid1 = Guid::from_hex("d39ddbf1c2c3d1a4baa070e5e76548bd").unwrap();
let guid2 = Guid::from_hex("abc123def456789012345678901234ab").unwrap();
guids.insert(guid1);
guids.insert(guid2);
let filter = TypeFilter::with_monobehaviour_guids(guids);
assert!(filter.should_parse_guid(&guid1));
assert!(filter.should_parse_guid(&guid2));
let guid3 = Guid::from_hex("00000000000000000000000000000000").unwrap();
assert!(!filter.should_parse_guid(&guid3));
assert!(filter.is_filtering());
// Should still accept any Unity type since we didn't set a type filter
assert!(filter.should_parse_type("Transform"));
assert!(filter.should_parse_type("AnyType"));
}
#[test]
fn test_type_filter_document_with_both() {
use std::collections::HashSet;
let mut types = HashSet::new();
types.insert("Transform".to_string());
let mut guids = HashSet::new();
let guid1 = Guid::from_hex("d39ddbf1c2c3d1a4baa070e5e76548bd").unwrap();
guids.insert(guid1);
let filter = TypeFilter::with_both(types, guids);
// Only Transform should pass
assert!(filter.should_parse_type("Transform"));
assert!(!filter.should_parse_type("GameObject"));
// Only guid1 should pass
assert!(filter.should_parse_guid(&guid1));
let guid2 = Guid::from_hex("abc123def456789012345678901234ab").unwrap();
assert!(!filter.should_parse_guid(&guid2));
assert!(filter.is_filtering());
}
}