This commit is contained in:
2026-01-07 09:29:03 +00:00
parent be061cb3a4
commit 2efa1aa86d
16 changed files with 1517 additions and 12 deletions

View File

@@ -139,6 +139,17 @@ pub mod yaml_helpers {
.map(String::from)
}
/// Get an interned string value from a YAML mapping
///
/// This version interns the string for memory efficiency,
/// stripping Unity duplicate suffixes before interning.
/// Use this for GameObject names and other frequently duplicated strings.
pub fn get_interned_string(map: &Mapping, key: &str) -> Option<std::sync::Arc<str>> {
map.get(&Value::String(key.to_string()))
.and_then(|v| v.as_str())
.map(|s| crate::types::intern_string(s))
}
/// Get an i64 value from a YAML mapping
pub fn get_i64(map: &Mapping, key: &str) -> Option<i64> {
map.get(&Value::String(key.to_string()))

View File

@@ -8,6 +8,7 @@ mod component;
mod guid;
mod ids;
mod reference;
mod string_interner;
mod type_filter;
mod type_registry;
mod unity_types;
@@ -20,6 +21,7 @@ pub use component::{
pub use guid::Guid;
pub use ids::{FileID, LocalID};
pub use reference::UnityReference;
pub use string_interner::intern_string;
pub use type_filter::TypeFilter;
pub use type_registry::{get_class_name, get_type_id};
pub use unity_types::{

View File

@@ -0,0 +1,233 @@
//! String interning system for GameObject names
//!
//! This module provides a global string interner that deduplicates GameObject names
//! to reduce memory usage. It also cleans Unity-generated duplicate suffixes like
//! " copy", " (1)", " (2)", etc.
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
/// Global string interner for deduplicating GameObject names
pub struct StringInterner {
strings: Mutex<HashSet<Arc<str>>>,
}
impl StringInterner {
/// Create a new string interner
fn new() -> Self {
Self {
strings: Mutex::new(HashSet::new()),
}
}
/// Intern a string, cleaning Unity duplicate suffixes first
///
/// This function:
/// 1. Removes Unity duplicate suffixes (" copy", " (1)", " (2)", etc.)
/// 2. Checks if the cleaned string is already interned
/// 3. Returns an Arc to the interned string
///
/// # Examples
/// ```
/// # use unity_parser::intern_string;
/// let s1 = intern_string("Player");
/// let s2 = intern_string("Player (1)");
/// let s3 = intern_string("Player copy");
///
/// // All three point to the same interned string
/// assert!(std::sync::Arc::ptr_eq(&s1, &s2));
/// assert!(std::sync::Arc::ptr_eq(&s1, &s3));
/// ```
pub fn intern(&self, s: &str) -> Arc<str> {
// Clean the suffix first
let cleaned = Self::clean_suffix(s);
// Lock the interner
let mut strings = self.strings.lock().unwrap();
// Check if we already have this string
if let Some(existing) = strings.get(cleaned) {
return Arc::clone(existing);
}
// Create a new interned string
let interned: Arc<str> = Arc::from(cleaned);
strings.insert(Arc::clone(&interned));
interned
}
/// Clean Unity duplicate suffixes from a string
///
/// Removes suffixes like:
/// - " copy" (case insensitive)
/// - " (1)", " (2)", " (123)", etc.
///
/// Multiple suffixes are removed iteratively.
///
/// # Examples
/// ```ignore
/// assert_eq!(clean_suffix("GameObject copy"), "GameObject");
/// assert_eq!(clean_suffix("GameObject (1)"), "GameObject");
/// assert_eq!(clean_suffix("GameObject (1) (2)"), "GameObject");
/// ```
fn clean_suffix(s: &str) -> &str {
static SUFFIX_REGEX: Lazy<Regex> = Lazy::new(|| {
// Matches:
// - " copy" (case insensitive)
// - " (1)", " (2)", " (123)", etc.
// At the end of the string
Regex::new(r"(?i)\s+copy$|\s+\(\d+\)$").unwrap()
});
let mut cleaned = s;
// Keep stripping suffixes until none remain
// Example: "GameObject (1) (2)" -> "GameObject"
while let Some(mat) = SUFFIX_REGEX.find(cleaned) {
cleaned = &cleaned[..mat.start()];
}
cleaned
}
}
/// Global string interner instance
static STRING_INTERNER: Lazy<StringInterner> = Lazy::new(StringInterner::new);
/// Intern a string (public API)
///
/// This is the main entry point for string interning. It cleans Unity duplicate
/// suffixes and returns an Arc to the interned string.
///
/// # Examples
/// ```
/// # use unity_parser::intern_string;
/// let name1 = intern_string("Player");
/// let name2 = intern_string("Player (1)");
///
/// // Both names point to the same interned string
/// assert!(std::sync::Arc::ptr_eq(&name1, &name2));
/// assert_eq!(name1.as_ref(), "Player");
/// ```
pub fn intern_string(s: &str) -> Arc<str> {
STRING_INTERNER.intern(s)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_clean_suffix_copy() {
assert_eq!(StringInterner::clean_suffix("GameObject copy"), "GameObject");
assert_eq!(StringInterner::clean_suffix("GameObject Copy"), "GameObject");
assert_eq!(StringInterner::clean_suffix("GameObject COPY"), "GameObject");
assert_eq!(
StringInterner::clean_suffix("GameObject copy"),
"GameObject"
);
}
#[test]
fn test_clean_suffix_numbered() {
assert_eq!(StringInterner::clean_suffix("GameObject (1)"), "GameObject");
assert_eq!(StringInterner::clean_suffix("GameObject (2)"), "GameObject");
assert_eq!(
StringInterner::clean_suffix("GameObject (123)"),
"GameObject"
);
assert_eq!(
StringInterner::clean_suffix("GameObject (1)"),
"GameObject"
);
}
#[test]
fn test_clean_suffix_multiple() {
assert_eq!(
StringInterner::clean_suffix("GameObject (1) (2)"),
"GameObject"
);
assert_eq!(
StringInterner::clean_suffix("GameObject copy (1)"),
"GameObject"
);
assert_eq!(
StringInterner::clean_suffix("GameObject (1) copy"),
"GameObject"
);
}
#[test]
fn test_clean_suffix_no_match() {
assert_eq!(StringInterner::clean_suffix("GameObject"), "GameObject");
assert_eq!(StringInterner::clean_suffix("copy"), "copy");
assert_eq!(StringInterner::clean_suffix("(1)"), "(1)");
assert_eq!(
StringInterner::clean_suffix("GameObject(1)"),
"GameObject(1)"
); // No space
assert_eq!(
StringInterner::clean_suffix("GameObject copy suffix"),
"GameObject copy suffix"
); // Not at end
}
#[test]
fn test_intern_deduplication() {
let s1 = intern_string("TestString");
let s2 = intern_string("TestString");
// Same Arc pointer (deduplicated)
assert!(Arc::ptr_eq(&s1, &s2));
assert_eq!(s1.as_ref(), "TestString");
}
#[test]
fn test_intern_with_suffix_cleaning() {
let s1 = intern_string("GameObject");
let s2 = intern_string("GameObject (1)");
let s3 = intern_string("GameObject copy");
// All should point to the same interned string
assert!(Arc::ptr_eq(&s1, &s2));
assert!(Arc::ptr_eq(&s1, &s3));
assert_eq!(s1.as_ref(), "GameObject");
}
#[test]
fn test_intern_different_strings() {
let s1 = intern_string("GameObject1");
let s2 = intern_string("GameObject2");
// Different pointers (different strings)
assert!(!Arc::ptr_eq(&s1, &s2));
assert_eq!(s1.as_ref(), "GameObject1");
assert_eq!(s2.as_ref(), "GameObject2");
}
#[test]
fn test_empty_string() {
let s = intern_string("");
assert_eq!(s.as_ref(), "");
}
#[test]
fn test_multiple_suffixes_complex() {
let s = intern_string("Player (1) (2) copy (3)");
assert_eq!(s.as_ref(), "Player");
}
#[test]
fn test_whitespace_variations() {
let s1 = intern_string("Object copy"); // Double space
let s2 = intern_string("Object copy"); // Single space
let s3 = intern_string("Object (1)"); // Triple space
// All should be deduplicated to "Object"
assert!(Arc::ptr_eq(&s1, &s2));
assert!(Arc::ptr_eq(&s1, &s3));
}
}

View File

@@ -2,13 +2,14 @@
use crate::types::{yaml_helpers, ComponentContext, UnityComponent};
use sparsey::Entity;
use std::sync::Arc;
/// A GameObject component
///
/// GameObjects are the fundamental objects in Unity that represent entities in a scene.
#[derive(Debug, Clone)]
pub struct GameObject {
pub name: String,
pub name: Arc<str>,
pub is_active: bool,
pub layer: i64,
pub tag: i64,
@@ -19,7 +20,8 @@ impl UnityComponent for GameObject {
///
/// Note: Caller is responsible for ensuring this is called on the correct document type.
fn parse(yaml: &serde_yaml::Mapping, _ctx: &ComponentContext) -> Option<Self> {
let name = yaml_helpers::get_string(yaml, "m_Name").unwrap_or_default();
let name = yaml_helpers::get_interned_string(yaml, "m_Name")
.unwrap_or_else(|| Arc::from(""));
let is_active = yaml_helpers::get_bool(yaml, "m_IsActive").unwrap_or(true);

View File

@@ -811,7 +811,7 @@ mod tests {
documents: Vec::new(),
file_id_map: HashMap::new(),
overrides: HashMap::new(),
next_file_id: i64::MAX,
next_file_id: Arc::new(Cell::new(i64::MAX)),
source_path: PathBuf::from("test.prefab"),
};
@@ -829,7 +829,7 @@ mod tests {
documents: Vec::new(),
file_id_map: HashMap::new(),
overrides: HashMap::new(),
next_file_id: i64::MAX,
next_file_id: Arc::new(Cell::new(i64::MAX)),
source_path: PathBuf::from("test.prefab"),
};