items DB
This commit is contained in:
@@ -139,6 +139,17 @@ pub mod yaml_helpers {
|
||||
.map(String::from)
|
||||
}
|
||||
|
||||
/// Get an interned string value from a YAML mapping
|
||||
///
|
||||
/// This version interns the string for memory efficiency,
|
||||
/// stripping Unity duplicate suffixes before interning.
|
||||
/// Use this for GameObject names and other frequently duplicated strings.
|
||||
pub fn get_interned_string(map: &Mapping, key: &str) -> Option<std::sync::Arc<str>> {
|
||||
map.get(&Value::String(key.to_string()))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| crate::types::intern_string(s))
|
||||
}
|
||||
|
||||
/// Get an i64 value from a YAML mapping
|
||||
pub fn get_i64(map: &Mapping, key: &str) -> Option<i64> {
|
||||
map.get(&Value::String(key.to_string()))
|
||||
|
||||
@@ -8,6 +8,7 @@ mod component;
|
||||
mod guid;
|
||||
mod ids;
|
||||
mod reference;
|
||||
mod string_interner;
|
||||
mod type_filter;
|
||||
mod type_registry;
|
||||
mod unity_types;
|
||||
@@ -20,6 +21,7 @@ pub use component::{
|
||||
pub use guid::Guid;
|
||||
pub use ids::{FileID, LocalID};
|
||||
pub use reference::UnityReference;
|
||||
pub use string_interner::intern_string;
|
||||
pub use type_filter::TypeFilter;
|
||||
pub use type_registry::{get_class_name, get_type_id};
|
||||
pub use unity_types::{
|
||||
|
||||
233
unity-parser/src/types/string_interner.rs
Normal file
233
unity-parser/src/types/string_interner.rs
Normal file
@@ -0,0 +1,233 @@
|
||||
//! String interning system for GameObject names
|
||||
//!
|
||||
//! This module provides a global string interner that deduplicates GameObject names
|
||||
//! to reduce memory usage. It also cleans Unity-generated duplicate suffixes like
|
||||
//! " copy", " (1)", " (2)", etc.
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
/// Global string interner for deduplicating GameObject names
|
||||
pub struct StringInterner {
|
||||
strings: Mutex<HashSet<Arc<str>>>,
|
||||
}
|
||||
|
||||
impl StringInterner {
|
||||
/// Create a new string interner
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
strings: Mutex::new(HashSet::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Intern a string, cleaning Unity duplicate suffixes first
|
||||
///
|
||||
/// This function:
|
||||
/// 1. Removes Unity duplicate suffixes (" copy", " (1)", " (2)", etc.)
|
||||
/// 2. Checks if the cleaned string is already interned
|
||||
/// 3. Returns an Arc to the interned string
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # use unity_parser::intern_string;
|
||||
/// let s1 = intern_string("Player");
|
||||
/// let s2 = intern_string("Player (1)");
|
||||
/// let s3 = intern_string("Player copy");
|
||||
///
|
||||
/// // All three point to the same interned string
|
||||
/// assert!(std::sync::Arc::ptr_eq(&s1, &s2));
|
||||
/// assert!(std::sync::Arc::ptr_eq(&s1, &s3));
|
||||
/// ```
|
||||
pub fn intern(&self, s: &str) -> Arc<str> {
|
||||
// Clean the suffix first
|
||||
let cleaned = Self::clean_suffix(s);
|
||||
|
||||
// Lock the interner
|
||||
let mut strings = self.strings.lock().unwrap();
|
||||
|
||||
// Check if we already have this string
|
||||
if let Some(existing) = strings.get(cleaned) {
|
||||
return Arc::clone(existing);
|
||||
}
|
||||
|
||||
// Create a new interned string
|
||||
let interned: Arc<str> = Arc::from(cleaned);
|
||||
strings.insert(Arc::clone(&interned));
|
||||
interned
|
||||
}
|
||||
|
||||
/// Clean Unity duplicate suffixes from a string
|
||||
///
|
||||
/// Removes suffixes like:
|
||||
/// - " copy" (case insensitive)
|
||||
/// - " (1)", " (2)", " (123)", etc.
|
||||
///
|
||||
/// Multiple suffixes are removed iteratively.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```ignore
|
||||
/// assert_eq!(clean_suffix("GameObject copy"), "GameObject");
|
||||
/// assert_eq!(clean_suffix("GameObject (1)"), "GameObject");
|
||||
/// assert_eq!(clean_suffix("GameObject (1) (2)"), "GameObject");
|
||||
/// ```
|
||||
fn clean_suffix(s: &str) -> &str {
|
||||
static SUFFIX_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
// Matches:
|
||||
// - " copy" (case insensitive)
|
||||
// - " (1)", " (2)", " (123)", etc.
|
||||
// At the end of the string
|
||||
Regex::new(r"(?i)\s+copy$|\s+\(\d+\)$").unwrap()
|
||||
});
|
||||
|
||||
let mut cleaned = s;
|
||||
|
||||
// Keep stripping suffixes until none remain
|
||||
// Example: "GameObject (1) (2)" -> "GameObject"
|
||||
while let Some(mat) = SUFFIX_REGEX.find(cleaned) {
|
||||
cleaned = &cleaned[..mat.start()];
|
||||
}
|
||||
|
||||
cleaned
|
||||
}
|
||||
}
|
||||
|
||||
/// Global string interner instance
|
||||
static STRING_INTERNER: Lazy<StringInterner> = Lazy::new(StringInterner::new);
|
||||
|
||||
/// Intern a string (public API)
|
||||
///
|
||||
/// This is the main entry point for string interning. It cleans Unity duplicate
|
||||
/// suffixes and returns an Arc to the interned string.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # use unity_parser::intern_string;
|
||||
/// let name1 = intern_string("Player");
|
||||
/// let name2 = intern_string("Player (1)");
|
||||
///
|
||||
/// // Both names point to the same interned string
|
||||
/// assert!(std::sync::Arc::ptr_eq(&name1, &name2));
|
||||
/// assert_eq!(name1.as_ref(), "Player");
|
||||
/// ```
|
||||
pub fn intern_string(s: &str) -> Arc<str> {
|
||||
STRING_INTERNER.intern(s)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_clean_suffix_copy() {
|
||||
assert_eq!(StringInterner::clean_suffix("GameObject copy"), "GameObject");
|
||||
assert_eq!(StringInterner::clean_suffix("GameObject Copy"), "GameObject");
|
||||
assert_eq!(StringInterner::clean_suffix("GameObject COPY"), "GameObject");
|
||||
assert_eq!(
|
||||
StringInterner::clean_suffix("GameObject copy"),
|
||||
"GameObject"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clean_suffix_numbered() {
|
||||
assert_eq!(StringInterner::clean_suffix("GameObject (1)"), "GameObject");
|
||||
assert_eq!(StringInterner::clean_suffix("GameObject (2)"), "GameObject");
|
||||
assert_eq!(
|
||||
StringInterner::clean_suffix("GameObject (123)"),
|
||||
"GameObject"
|
||||
);
|
||||
assert_eq!(
|
||||
StringInterner::clean_suffix("GameObject (1)"),
|
||||
"GameObject"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clean_suffix_multiple() {
|
||||
assert_eq!(
|
||||
StringInterner::clean_suffix("GameObject (1) (2)"),
|
||||
"GameObject"
|
||||
);
|
||||
assert_eq!(
|
||||
StringInterner::clean_suffix("GameObject copy (1)"),
|
||||
"GameObject"
|
||||
);
|
||||
assert_eq!(
|
||||
StringInterner::clean_suffix("GameObject (1) copy"),
|
||||
"GameObject"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clean_suffix_no_match() {
|
||||
assert_eq!(StringInterner::clean_suffix("GameObject"), "GameObject");
|
||||
assert_eq!(StringInterner::clean_suffix("copy"), "copy");
|
||||
assert_eq!(StringInterner::clean_suffix("(1)"), "(1)");
|
||||
assert_eq!(
|
||||
StringInterner::clean_suffix("GameObject(1)"),
|
||||
"GameObject(1)"
|
||||
); // No space
|
||||
assert_eq!(
|
||||
StringInterner::clean_suffix("GameObject copy suffix"),
|
||||
"GameObject copy suffix"
|
||||
); // Not at end
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intern_deduplication() {
|
||||
let s1 = intern_string("TestString");
|
||||
let s2 = intern_string("TestString");
|
||||
|
||||
// Same Arc pointer (deduplicated)
|
||||
assert!(Arc::ptr_eq(&s1, &s2));
|
||||
assert_eq!(s1.as_ref(), "TestString");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intern_with_suffix_cleaning() {
|
||||
let s1 = intern_string("GameObject");
|
||||
let s2 = intern_string("GameObject (1)");
|
||||
let s3 = intern_string("GameObject copy");
|
||||
|
||||
// All should point to the same interned string
|
||||
assert!(Arc::ptr_eq(&s1, &s2));
|
||||
assert!(Arc::ptr_eq(&s1, &s3));
|
||||
assert_eq!(s1.as_ref(), "GameObject");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intern_different_strings() {
|
||||
let s1 = intern_string("GameObject1");
|
||||
let s2 = intern_string("GameObject2");
|
||||
|
||||
// Different pointers (different strings)
|
||||
assert!(!Arc::ptr_eq(&s1, &s2));
|
||||
assert_eq!(s1.as_ref(), "GameObject1");
|
||||
assert_eq!(s2.as_ref(), "GameObject2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_string() {
|
||||
let s = intern_string("");
|
||||
assert_eq!(s.as_ref(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_suffixes_complex() {
|
||||
let s = intern_string("Player (1) (2) copy (3)");
|
||||
assert_eq!(s.as_ref(), "Player");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_whitespace_variations() {
|
||||
let s1 = intern_string("Object copy"); // Double space
|
||||
let s2 = intern_string("Object copy"); // Single space
|
||||
let s3 = intern_string("Object (1)"); // Triple space
|
||||
|
||||
// All should be deduplicated to "Object"
|
||||
assert!(Arc::ptr_eq(&s1, &s2));
|
||||
assert!(Arc::ptr_eq(&s1, &s3));
|
||||
}
|
||||
}
|
||||
@@ -2,13 +2,14 @@
|
||||
|
||||
use crate::types::{yaml_helpers, ComponentContext, UnityComponent};
|
||||
use sparsey::Entity;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// A GameObject component
|
||||
///
|
||||
/// GameObjects are the fundamental objects in Unity that represent entities in a scene.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GameObject {
|
||||
pub name: String,
|
||||
pub name: Arc<str>,
|
||||
pub is_active: bool,
|
||||
pub layer: i64,
|
||||
pub tag: i64,
|
||||
@@ -19,7 +20,8 @@ impl UnityComponent for GameObject {
|
||||
///
|
||||
/// Note: Caller is responsible for ensuring this is called on the correct document type.
|
||||
fn parse(yaml: &serde_yaml::Mapping, _ctx: &ComponentContext) -> Option<Self> {
|
||||
let name = yaml_helpers::get_string(yaml, "m_Name").unwrap_or_default();
|
||||
let name = yaml_helpers::get_interned_string(yaml, "m_Name")
|
||||
.unwrap_or_else(|| Arc::from(""));
|
||||
|
||||
let is_active = yaml_helpers::get_bool(yaml, "m_IsActive").unwrap_or(true);
|
||||
|
||||
|
||||
@@ -811,7 +811,7 @@ mod tests {
|
||||
documents: Vec::new(),
|
||||
file_id_map: HashMap::new(),
|
||||
overrides: HashMap::new(),
|
||||
next_file_id: i64::MAX,
|
||||
next_file_id: Arc::new(Cell::new(i64::MAX)),
|
||||
source_path: PathBuf::from("test.prefab"),
|
||||
};
|
||||
|
||||
@@ -829,7 +829,7 @@ mod tests {
|
||||
documents: Vec::new(),
|
||||
file_id_map: HashMap::new(),
|
||||
overrides: HashMap::new(),
|
||||
next_file_id: i64::MAX,
|
||||
next_file_id: Arc::new(Cell::new(i64::MAX)),
|
||||
source_path: PathBuf::from("test.prefab"),
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user