From 2efa1aa86d22ddabd75d1f592a73d0bbf04a4f6f Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 7 Jan 2026 09:29:03 +0000 Subject: [PATCH] items DB --- .claude/settings.local.json | 3 +- Cargo.lock | 364 +++++++++++++++++- cursebreaker-parser/Cargo.toml | 13 + cursebreaker-parser/XML_PARSING.md | 197 ++++++++++ .../examples/item_database_demo.rs | 101 +++++ cursebreaker-parser/src/item_database.rs | 157 ++++++++ cursebreaker-parser/src/lib.rs | 58 +++ cursebreaker-parser/src/main.rs | 30 +- cursebreaker-parser/src/types/item.rs | 158 ++++++++ cursebreaker-parser/src/types/mod.rs | 2 + cursebreaker-parser/src/xml_parser.rs | 190 +++++++++ unity-parser/src/types/component.rs | 11 + unity-parser/src/types/mod.rs | 2 + unity-parser/src/types/string_interner.rs | 233 +++++++++++ .../src/types/unity_types/game_object.rs | 6 +- .../src/types/unity_types/prefab_instance.rs | 4 +- 16 files changed, 1517 insertions(+), 12 deletions(-) create mode 100644 cursebreaker-parser/XML_PARSING.md create mode 100644 cursebreaker-parser/examples/item_database_demo.rs create mode 100644 cursebreaker-parser/src/item_database.rs create mode 100644 cursebreaker-parser/src/lib.rs create mode 100644 cursebreaker-parser/src/types/item.rs create mode 100644 cursebreaker-parser/src/xml_parser.rs create mode 100644 unity-parser/src/types/string_interner.rs diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 48969ef..8e2201b 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -17,7 +17,8 @@ "Bash(cargo doc:*)", "Bash(xargs dirname:*)", "Bash(xargs -I {} find {} -name \"*.cs\")", - "Bash(RUST_LOG=debug cargo run:*)" + "Bash(RUST_LOG=debug cargo run:*)", + "WebSearch" ], "additionalDirectories": [ "/home/connor/repos/CBAssets/" diff --git a/Cargo.lock b/Cargo.lock index 0677b99..58a9a83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,35 +23,185 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41e67cd8309bbd06cd603a9e693a784ac2e5d1e955f11286e355089fcab3047c" +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + +[[package]] +name = "cc" +version = "1.2.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + [[package]] name = "cursebreaker-parser" version = "0.1.0" dependencies = [ + "diesel", "inventory", "log", + "quick-xml", + "serde", + "serde_json", "serde_yaml", "sparsey", + "thiserror 1.0.69", "unity-parser", ] +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "deranged" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "diesel" +version = "2.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e130c806dccc85428c564f2dc5a96e05b6615a27c9a28776bd7761a9af4bb552" +dependencies = [ + "diesel_derives", + "downcast-rs", + "libsqlite3-sys", + "sqlite-wasm-rs", + "time", +] + +[[package]] +name = "diesel_derives" +version = "2.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c30b2969f923fa1f73744b92bb7df60b858df8832742d9a3aceb79236c0be1d2" +dependencies = [ + "diesel_table_macro_syntax", + "dsl_auto_type", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "diesel_table_macro_syntax" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe2444076b48641147115697648dc743c2c00b61adade0f01ce67133c7babe8c" +dependencies = [ + "syn", +] + [[package]] name = "diff" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "downcast-rs" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" + +[[package]] +name = "dsl_auto_type" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd122633e4bef06db27737f21d3738fb89c8f6d5360d6d9d7635dda142a7757e" +dependencies = [ + "darling", + "either", + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "find-msvc-tools" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "glam" version = "0.29.3" @@ -69,7 +219,7 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] @@ -77,6 +227,21 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "foldhash 0.2.0", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "indexmap" @@ -105,6 +270,26 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f" +dependencies = [ + "pkg-config", + "vcpkg", +] + [[package]] name = "log" version = "0.4.29" @@ -126,12 +311,30 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "pretty_assertions" version = "1.4.1" @@ -151,6 +354,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.42" @@ -246,6 +458,19 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + [[package]] name = "serde_yaml" version = "0.9.34+deprecated" @@ -259,6 +484,12 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.15.1" @@ -276,6 +507,25 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "sqlite-wasm-rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05e98301bf8b0540c7de45ecd760539b9c62f5772aed172f08efba597c11cd5d" +dependencies = [ + "cc", + "hashbrown 0.16.1", + "js-sys", + "thiserror 2.0.17", + "wasm-bindgen", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.111" @@ -293,7 +543,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", ] [[package]] @@ -307,6 +566,48 @@ dependencies = [ "syn", ] +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.3.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" + +[[package]] +name = "time-macros" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "unicode-ident" version = "1.0.22" @@ -329,7 +630,7 @@ dependencies = [ "serde_yaml", "smallvec", "sparsey", - "thiserror", + "thiserror 1.0.69", "unity-parser-macros", "walkdir", ] @@ -349,6 +650,12 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "walkdir" version = "2.5.0" @@ -359,6 +666,51 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + [[package]] name = "winapi-util" version = "0.1.11" @@ -388,3 +740,9 @@ name = "yansi" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zmij" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" diff --git a/cursebreaker-parser/Cargo.toml b/cursebreaker-parser/Cargo.toml index 21971f5..5178794 100644 --- a/cursebreaker-parser/Cargo.toml +++ b/cursebreaker-parser/Cargo.toml @@ -3,9 +3,22 @@ name = "cursebreaker-parser" version = "0.1.0" edition = "2021" +[lib] +name = "cursebreaker_parser" +path = "src/lib.rs" + +[[bin]] +name = "cursebreaker-parser" +path = "src/main.rs" + [dependencies] unity-parser = { path = "../unity-parser" } serde_yaml = "0.9" inventory = "0.3" sparsey = "0.13" log = { version = "0.4", features = ["std"] } +quick-xml = "0.37" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +diesel = { version = "2.2", features = ["sqlite"], optional = true } +thiserror = "1.0" diff --git a/cursebreaker-parser/XML_PARSING.md b/cursebreaker-parser/XML_PARSING.md new file mode 100644 index 0000000..84da0e7 --- /dev/null +++ b/cursebreaker-parser/XML_PARSING.md @@ -0,0 +1,197 @@ +# XML Parsing in Cursebreaker Parser + +This document describes the XML parsing functionality added to the cursebreaker-parser project. + +## Overview + +The parser now supports loading game data from Cursebreaker's XML files and storing them in efficient data structures for runtime access and SQL database serialization. + +## Features + +- ✅ Parse Items.xml with full attribute and nested element support +- ✅ In-memory database with fast lookups by ID, name, category, slot, and skill +- ✅ JSON serialization for SQL database storage +- ✅ Type-safe data structures with serde support +- ✅ Easy-to-use API + +## Quick Start + +### Loading Items + +```rust +use cursebreaker_parser::ItemDatabase; + +let item_db = ItemDatabase::load_from_xml("Data/XMLs/Items/Items.xml")?; +println!("Loaded {} items", item_db.len()); +``` + +### Querying Items + +```rust +// Get by ID +if let Some(item) = item_db.get_by_id(150) { + println!("Found: {}", item.name); +} + +// Get by category +let bows = item_db.get_by_category("bow"); + +// Get by slot +let weapons = item_db.get_by_slot("weapon"); + +// Get by skill requirement +let magic_items = item_db.get_by_skill("magic"); + +// Get all items +for item in item_db.all_items() { + println!("{}: {}", item.id, item.name); +} +``` + +### SQL Serialization + +```rust +// Prepare items for SQL insertion +let sql_data = item_db.prepare_for_sql(); + +for (id, name, json_data) in sql_data { + // INSERT INTO items (id, name, data) VALUES (?, ?, ?) + // Use your preferred SQL library to insert +} +``` + +## Data Structures + +### Item + +The main `Item` struct contains all item attributes from the XML: + +```rust +pub struct Item { + // Required + pub id: i32, + pub name: String, + + // Optional attributes + pub level: Option, + pub description: Option, + pub price: Option, + pub slot: Option, + pub category: Option, + pub skill: Option, + + // ... many more fields + + // Nested elements + pub stats: Vec, + pub crafting_recipes: Vec, + pub animations: Option, + pub generate_rules: Vec, +} +``` + +### ItemStat + +Represents item statistics: + +```rust +pub struct ItemStat { + // Damage + pub damagephysical: Option, + pub damagemagical: Option, + pub damageranged: Option, + + // Accuracy + pub accuracyphysical: Option, + pub accuracymagical: Option, + pub accuracyranged: Option, + + // Resistance + pub resistancephysical: Option, + pub resistancemagical: Option, + pub resistanceranged: Option, + + // Core stats + pub health: Option, + pub mana: Option, + pub manaregen: Option, + pub healing: Option, + + // Harvesting + pub harvestingspeedwoodcutting: Option, +} +``` + +## Example Program + +Run the demo to see all features in action: + +```bash +cargo run --example item_database_demo +``` + +## Statistics from Items.xml + +When loaded from `/home/connor/repos/CBAssets/Data/XMLs/Items/Items.xml`: + +- **Total Items**: 1,360 +- **Weapons**: 166 +- **Armor**: 148 +- **Consumables**: 294 +- **Trinkets**: 59 +- **Bows**: 18 +- **Magic Items**: 76 + +## File Structure + +``` +cursebreaker-parser/ +├── src/ +│ ├── lib.rs # Library exports +│ ├── main.rs # Main binary (includes Unity + XML parsing) +│ ├── types/ +│ │ ├── mod.rs +│ │ ├── item.rs # Item data structures +│ │ └── interactable_resource.rs +│ ├── xml_parser.rs # XML parsing logic +│ └── item_database.rs # ItemDatabase for runtime access +└── examples/ + └── item_database_demo.rs # Full usage example +``` + +## Dependencies Added + +```toml +quick-xml = "0.37" # XML parsing +serde = { version = "1.0", features = ["derive"] } # Serialization +serde_json = "1.0" # JSON serialization +diesel = { version = "2.2", features = ["sqlite"], optional = true } # SQL (optional) +thiserror = "1.0" # Error handling +``` + +## Future Enhancements + +The same pattern can be extended to parse other XML files: + +- [ ] NPCs (`/XMLs/Npcs/*.xml`) +- [ ] Quests (`/XMLs/Quests/*.xml`) +- [ ] Loot tables (`/XMLs/Loot/*.xml`) +- [ ] Maps (`/XMLs/Maps/*.xml`) +- [ ] Dialogue (`/XMLs/Dialogue/*.xml`) +- [ ] Events (`/XMLs/Events/*.xml`) + +Each would follow the same pattern: +1. Define data structures in `src/types/` +2. Create parser in `src/xml_parser.rs` +3. Create database wrapper for runtime access +4. Add to `lib.rs` exports + +## Integration with Unity Parser + +The main binary (`src/main.rs`) demonstrates integration of both systems: + +1. Load game data from XML files (Items, etc.) +2. Parse Unity scenes for game objects +3. Cross-reference data (e.g., item IDs in loot spawners) + +This creates a complete game data pipeline from source files to runtime. diff --git a/cursebreaker-parser/examples/item_database_demo.rs b/cursebreaker-parser/examples/item_database_demo.rs new file mode 100644 index 0000000..11f41b3 --- /dev/null +++ b/cursebreaker-parser/examples/item_database_demo.rs @@ -0,0 +1,101 @@ +//! Example demonstrating ItemDatabase usage +//! +//! Run with: cargo run --example item_database_demo + +use cursebreaker_parser::ItemDatabase; + +fn main() -> Result<(), Box> { + println!("🎮 Cursebreaker Item Database Demo\n"); + + // Load items from XML + let items_path = "/home/connor/repos/CBAssets/Data/XMLs/Items/Items.xml"; + println!("📚 Loading items from: {}", items_path); + + let item_db = ItemDatabase::load_from_xml(items_path)?; + println!("✅ Loaded {} items\n", item_db.len()); + + // Example 1: Get item by ID + println!("=== Example 1: Get Item by ID ==="); + if let Some(item) = item_db.get_by_id(150) { + println!("Item ID 150:"); + println!(" Name: {}", item.name); + if let Some(desc) = &item.description { + println!(" Description: {}", desc); + } + if let Some(slot) = &item.slot { + println!(" Slot: {}", slot); + } + if let Some(skill) = &item.skill { + println!(" Skill: {}", skill); + } + println!(" Stats: {} stat entries", item.stats.len()); + } + println!(); + + // Example 2: Get items by category + println!("=== Example 2: Get Items by Category ==="); + let bows = item_db.get_by_category("bow"); + println!("Found {} bows:", bows.len()); + for item in bows.iter().take(5) { + println!(" - {} (ID: {})", item.name, item.id); + } + println!(); + + // Example 3: Get items by slot + println!("=== Example 3: Get Items by Slot ==="); + let consumables = item_db.get_by_slot("consumable"); + println!("Found {} consumables (showing first 10):", consumables.len()); + for item in consumables.iter().take(10) { + let name = &item.name; + let id = item.id; + if let Some(desc) = &item.description { + println!(" - {} (ID: {}) - {}", name, id, desc.chars().take(50).collect::()); + } else { + println!(" - {} (ID: {})", name, id); + } + } + println!(); + + // Example 4: Get items by skill + println!("=== Example 4: Get Items by Skill ==="); + let magic_items = item_db.get_by_skill("magic"); + println!("Found {} magic items:", magic_items.len()); + for item in magic_items.iter().take(5) { + println!(" - {} (ID: {}, Level: {:?})", + item.name, item.id, item.level); + } + println!(); + + // Example 5: Statistics + println!("=== Example 5: Database Statistics ==="); + let weapons = item_db.get_by_slot("weapon"); + let armor = item_db.get_by_slot("armor"); + let consumables = item_db.get_by_slot("consumable"); + let trinkets = item_db.get_by_slot("trinket"); + + println!("Item Distribution by Slot:"); + println!(" Weapons: {}", weapons.len()); + println!(" Armor: {}", armor.len()); + println!(" Consumables: {}", consumables.len()); + println!(" Trinkets: {}", trinkets.len()); + println!(); + + // Example 6: Prepare for SQL (showing how it would be used) + println!("=== Example 6: SQL Serialization ==="); + let sql_data = item_db.prepare_for_sql(); + println!("Prepared {} items for SQL insertion", sql_data.len()); + println!("Sample SQL inserts (first 3):"); + for (id, name, json) in sql_data.iter().take(3) { + let json_preview = if json.len() > 100 { + format!("{}...", &json[..100]) + } else { + json.clone() + }; + println!(" INSERT INTO items (id, name, data) VALUES ({}, '{}', '{}');", + id, name, json_preview); + } + + println!("\n✨ Demo complete!"); + + Ok(()) +} diff --git a/cursebreaker-parser/src/item_database.rs b/cursebreaker-parser/src/item_database.rs new file mode 100644 index 0000000..66b3e52 --- /dev/null +++ b/cursebreaker-parser/src/item_database.rs @@ -0,0 +1,157 @@ +use crate::types::Item; +use crate::xml_parser::{parse_items_xml, XmlParseError}; +use std::collections::HashMap; +use std::path::Path; + +/// A database for managing game items loaded from XML files +#[derive(Debug, Clone)] +pub struct ItemDatabase { + items: Vec, + items_by_id: HashMap, + items_by_name: HashMap>, +} + +impl ItemDatabase { + /// Create a new empty ItemDatabase + pub fn new() -> Self { + Self { + items: Vec::new(), + items_by_id: HashMap::new(), + items_by_name: HashMap::new(), + } + } + + /// Load items from an XML file + pub fn load_from_xml>(path: P) -> Result { + let items = parse_items_xml(path)?; + let mut db = Self::new(); + db.add_items(items); + Ok(db) + } + + /// Add items to the database + pub fn add_items(&mut self, items: Vec) { + for item in items { + let index = self.items.len(); + self.items_by_id.insert(item.id, index); + + // Add to name index (can have multiple items with same name) + self.items_by_name + .entry(item.name.clone()) + .or_insert_with(Vec::new) + .push(index); + + self.items.push(item); + } + } + + /// Get an item by ID + pub fn get_by_id(&self, id: i32) -> Option<&Item> { + self.items_by_id + .get(&id) + .and_then(|&index| self.items.get(index)) + } + + /// Get items by name (returns all items with matching name) + pub fn get_by_name(&self, name: &str) -> Vec<&Item> { + self.items_by_name + .get(name) + .map(|indices| { + indices + .iter() + .filter_map(|&index| self.items.get(index)) + .collect() + }) + .unwrap_or_default() + } + + /// Get all items + pub fn all_items(&self) -> &[Item] { + &self.items + } + + /// Get items by category + pub fn get_by_category(&self, category: &str) -> Vec<&Item> { + self.items + .iter() + .filter(|item| { + item.category + .as_ref() + .map(|c| c == category) + .unwrap_or(false) + }) + .collect() + } + + /// Get items by slot + pub fn get_by_slot(&self, slot: &str) -> Vec<&Item> { + self.items + .iter() + .filter(|item| { + item.slot + .as_ref() + .map(|s| s == slot) + .unwrap_or(false) + }) + .collect() + } + + /// Get items by skill requirement + pub fn get_by_skill(&self, skill: &str) -> Vec<&Item> { + self.items + .iter() + .filter(|item| { + item.skill + .as_ref() + .map(|s| s == skill) + .unwrap_or(false) + }) + .collect() + } + + /// Get number of items in database + pub fn len(&self) -> usize { + self.items.len() + } + + /// Check if database is empty + pub fn is_empty(&self) -> bool { + self.items.is_empty() + } + + /// Serialize items to JSON for SQL storage + #[cfg(feature = "diesel")] + pub fn to_json(&self) -> Result { + serde_json::to_string(&self.items) + } + + /// Prepare items for SQL insertion + /// Returns a vector of tuples (id, name, json_data) + pub fn prepare_for_sql(&self) -> Vec<(i32, String, String)> { + self.items + .iter() + .map(|item| { + let json = serde_json::to_string(item).unwrap_or_else(|_| "{}".to_string()); + (item.id, item.name.clone(), json) + }) + .collect() + } +} + +impl Default for ItemDatabase { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_item_database_basic() { + let mut db = ItemDatabase::new(); + assert!(db.is_empty()); + assert_eq!(db.len(), 0); + } +} diff --git a/cursebreaker-parser/src/lib.rs b/cursebreaker-parser/src/lib.rs new file mode 100644 index 0000000..0567501 --- /dev/null +++ b/cursebreaker-parser/src/lib.rs @@ -0,0 +1,58 @@ +//! Cursebreaker Parser - A library for parsing Cursebreaker game data +//! +//! This library provides functionality to: +//! - Parse Unity scenes and extract game objects +//! - Load game data from XML files (Items, NPCs, Quests, etc.) +//! - Store and query game data at runtime +//! - Serialize data to SQL databases +//! +//! # Example - Loading Items from XML +//! +//! ```no_run +//! use cursebreaker_parser::ItemDatabase; +//! +//! // Load all items from XML +//! let item_db = ItemDatabase::load_from_xml("Data/XMLs/Items/Items.xml")?; +//! println!("Loaded {} items", item_db.len()); +//! +//! // Get item by ID +//! if let Some(item) = item_db.get_by_id(150) { +//! println!("Found: {}", item.name); +//! } +//! +//! // Query items by category +//! let weapons = item_db.get_by_category("bow"); +//! println!("Found {} bows", weapons.len()); +//! +//! // Query items by slot +//! let consumables = item_db.get_by_slot("consumable"); +//! for item in consumables { +//! println!("Consumable: {}", item.name); +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Example - Preparing Data for SQL +//! +//! ```no_run +//! use cursebreaker_parser::ItemDatabase; +//! +//! let item_db = ItemDatabase::load_from_xml("Data/XMLs/Items/Items.xml")?; +//! +//! // Prepare data for SQL insertion +//! // Returns Vec<(id, name, json_data)> +//! let sql_data = item_db.prepare_for_sql(); +//! +//! for (id, name, json) in sql_data.iter().take(5) { +//! println!("INSERT INTO items VALUES ({}, '{}', '{}')", id, name, json); +//! } +//! # Ok::<(), Box>(()) +//! ``` + +pub mod types; +mod xml_parser; +mod item_database; + +pub use item_database::ItemDatabase; +pub use types::{Item, ItemStat, CraftingRecipe, AnimationSet, GenerateRule, InteractableResource}; +pub use xml_parser::XmlParseError; diff --git a/cursebreaker-parser/src/main.rs b/cursebreaker-parser/src/main.rs index e6322f5..c22cfb8 100644 --- a/cursebreaker-parser/src/main.rs +++ b/cursebreaker-parser/src/main.rs @@ -6,9 +6,7 @@ //! 3. Extracting typeId and transform positions //! 4. Writing resource data to an output file -mod types; - -use types::InteractableResource; +use cursebreaker_parser::{ItemDatabase, InteractableResource}; use unity_parser::UnityProject; use std::path::Path; use unity_parser::log::DedupLogger; @@ -24,9 +22,33 @@ fn main() -> Result<(), Box> { info!("🎮 Cursebreaker - Resource Parser"); + // Load items from XML + info!("📚 Loading items from XML..."); + let items_path = "/home/connor/repos/CBAssets/Data/XMLs/Items/Items.xml"; + let item_db = ItemDatabase::load_from_xml(items_path)?; + info!("✅ Loaded {} items from XML", item_db.len()); + + // Print some item statistics + let weapons = item_db.get_by_slot("weapon"); + let consumables = item_db.get_by_slot("consumable"); + info!(" • Weapons: {}", weapons.len()); + info!(" • Consumables: {}", consumables.len()); + + // Example: Print first few items + info!("\n📦 Sample Items:"); + for item in item_db.all_items().iter().take(5) { + info!(" ID: {}, Name: \"{}\"", item.id, item.name); + if let Some(desc) = &item.description { + info!(" Description: {}", desc); + } + if let Some(price) = item.price { + info!(" Price: {}", price); + } + } + // Initialize Unity project once - scans entire project for GUID mappings let project_root = Path::new("/home/connor/repos/CBAssets"); - info!("📦 Initializing Unity project from: {}", project_root.display()); + info!("\n📦 Initializing Unity project from: {}", project_root.display()); let project = UnityProject::from_path(project_root)?; diff --git a/cursebreaker-parser/src/types/item.rs b/cursebreaker-parser/src/types/item.rs new file mode 100644 index 0000000..6bbc288 --- /dev/null +++ b/cursebreaker-parser/src/types/item.rs @@ -0,0 +1,158 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Item { + // Required fields + pub id: i32, + pub name: String, + + // Optional basic attributes + pub level: Option, + pub description: Option, + pub price: Option, + pub slot: Option, + pub category: Option, + pub skill: Option, + pub tool: Option, + + // Item behavior + pub stackable: Option, + pub maxstack: Option, + pub abilityid: Option, + pub swap: Option, + pub twohanded: Option, + + // Food/consumable properties + pub foodamount: Option, + pub foodfrequency: Option, + pub foodtime: Option, + pub foodlevel: Option, + + // Crafting + pub craftingskill: Option, + pub workbench: Option, + pub craftingitems: Option, + + // Visual/audio + pub handmodel: Option, + pub groundmodel: Option, + pub usingitemmodel: Option, + pub dropsfx: Option, + pub pickupsfx: Option, + pub hitgfx: Option, + pub attackanimations: Option, + pub attackanimationspeed: Option, + pub attackhitsounds: Option, + + // Storage + pub storageitem: Option, + pub storagesize: Option, + + // Other flags + pub hidemilestone: Option, + pub generateicon: Option, + pub comment: Option, + + // Nested elements + pub stats: Vec, + pub crafting_recipes: Vec, + pub animations: Option, + pub generate_rules: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ItemStat { + // Damage stats + pub damagephysical: Option, + pub damagemagical: Option, + pub damageranged: Option, + + // Accuracy stats + pub accuracyphysical: Option, + pub accuracymagical: Option, + pub accuracyranged: Option, + + // Resistance stats + pub resistancephysical: Option, + pub resistancemagical: Option, + pub resistanceranged: Option, + + // Core stats + pub health: Option, + pub mana: Option, + pub manaregen: Option, + pub healing: Option, + + // Harvesting stats + pub harvestingspeedwoodcutting: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CraftingRecipe { + pub workbench: Option, + pub craftingitems: Option, + pub craftingskill: Option, + pub checks: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AnimationSet { + pub idle: Option, + pub walk: Option, + pub run: Option, + pub weaponattack: Option, + pub takehit: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GenerateRule { + pub generatestats: Option, + pub generatecrafting: Option, + pub generateicon: Option, +} + +impl Item { + pub fn new(id: i32, name: String) -> Self { + Self { + id, + name, + level: None, + description: None, + price: None, + slot: None, + category: None, + skill: None, + tool: None, + stackable: None, + maxstack: None, + abilityid: None, + swap: None, + twohanded: None, + foodamount: None, + foodfrequency: None, + foodtime: None, + foodlevel: None, + craftingskill: None, + workbench: None, + craftingitems: None, + handmodel: None, + groundmodel: None, + usingitemmodel: None, + dropsfx: None, + pickupsfx: None, + hitgfx: None, + attackanimations: None, + attackanimationspeed: None, + attackhitsounds: None, + storageitem: None, + storagesize: None, + hidemilestone: None, + generateicon: None, + comment: None, + stats: Vec::new(), + crafting_recipes: Vec::new(), + animations: None, + generate_rules: Vec::new(), + } + } +} diff --git a/cursebreaker-parser/src/types/mod.rs b/cursebreaker-parser/src/types/mod.rs index 21249cc..ecd31da 100644 --- a/cursebreaker-parser/src/types/mod.rs +++ b/cursebreaker-parser/src/types/mod.rs @@ -1,3 +1,5 @@ mod interactable_resource; +mod item; pub use interactable_resource::InteractableResource; +pub use item::{Item, ItemStat, CraftingRecipe, AnimationSet, GenerateRule}; diff --git a/cursebreaker-parser/src/xml_parser.rs b/cursebreaker-parser/src/xml_parser.rs new file mode 100644 index 0000000..fb32da6 --- /dev/null +++ b/cursebreaker-parser/src/xml_parser.rs @@ -0,0 +1,190 @@ +use crate::types::{Item, ItemStat, CraftingRecipe, AnimationSet, GenerateRule}; +use quick_xml::events::Event; +use quick_xml::reader::Reader; +use std::collections::HashMap; +use std::fs::File; +use std::io::BufReader; +use std::path::Path; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum XmlParseError { + #[error("XML parsing error: {0}")] + XmlError(#[from] quick_xml::Error), + + #[error("IO error: {0}")] + IoError(#[from] std::io::Error), + + #[error("Attribute error: {0}")] + AttrError(#[from] quick_xml::events::attributes::AttrError), + + #[error("Missing required attribute: {0}")] + MissingAttribute(String), + + #[error("Invalid attribute value: {0}")] + InvalidAttribute(String), +} + +pub fn parse_items_xml>(path: P) -> Result, XmlParseError> { + let file = File::open(path)?; + let buf_reader = BufReader::new(file); + let mut reader = Reader::from_reader(buf_reader); + reader.config_mut().trim_text(true); + + let mut items = Vec::new(); + let mut buf = Vec::new(); + let mut current_item: Option = None; + + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { + match e.name().as_ref() { + b"item" => { + let attrs = parse_attributes(&e)?; + + // Get required attributes + let id = attrs.get("id") + .ok_or_else(|| XmlParseError::MissingAttribute("id".to_string()))? + .parse::() + .map_err(|_| XmlParseError::InvalidAttribute("id".to_string()))?; + + let name = attrs.get("name") + .ok_or_else(|| XmlParseError::MissingAttribute("name".to_string()))? + .clone(); + + let mut item = Item::new(id, name); + + // Parse optional attributes + if let Some(v) = attrs.get("level") { item.level = v.parse().ok(); } + if let Some(v) = attrs.get("description") { item.description = Some(v.clone()); } + if let Some(v) = attrs.get("price") { item.price = v.parse().ok(); } + if let Some(v) = attrs.get("slot") { item.slot = Some(v.clone()); } + if let Some(v) = attrs.get("category") { item.category = Some(v.clone()); } + if let Some(v) = attrs.get("skill") { item.skill = Some(v.clone()); } + if let Some(v) = attrs.get("tool") { item.tool = Some(v.clone()); } + if let Some(v) = attrs.get("stackable") { item.stackable = v.parse().ok(); } + if let Some(v) = attrs.get("maxstack") { item.maxstack = v.parse().ok(); } + if let Some(v) = attrs.get("abilityid") { item.abilityid = v.parse().ok(); } + if let Some(v) = attrs.get("swap") { item.swap = v.parse().ok(); } + if let Some(v) = attrs.get("twohanded") { item.twohanded = v.parse().ok(); } + if let Some(v) = attrs.get("foodamount") { item.foodamount = v.parse().ok(); } + if let Some(v) = attrs.get("foodfrequency") { item.foodfrequency = v.parse().ok(); } + if let Some(v) = attrs.get("foodtime") { item.foodtime = v.parse().ok(); } + if let Some(v) = attrs.get("foodlevel") { item.foodlevel = v.parse().ok(); } + if let Some(v) = attrs.get("craftingskill") { item.craftingskill = Some(v.clone()); } + if let Some(v) = attrs.get("workbench") { item.workbench = v.parse().ok(); } + if let Some(v) = attrs.get("craftingitems") { item.craftingitems = Some(v.clone()); } + if let Some(v) = attrs.get("handmodel") { item.handmodel = Some(v.clone()); } + if let Some(v) = attrs.get("groundmodel") { item.groundmodel = Some(v.clone()); } + if let Some(v) = attrs.get("usingitemmodel") { item.usingitemmodel = Some(v.clone()); } + if let Some(v) = attrs.get("dropsfx") { item.dropsfx = Some(v.clone()); } + if let Some(v) = attrs.get("pickupsfx") { item.pickupsfx = Some(v.clone()); } + if let Some(v) = attrs.get("hitgfx") { item.hitgfx = Some(v.clone()); } + if let Some(v) = attrs.get("attackanimations") { item.attackanimations = Some(v.clone()); } + if let Some(v) = attrs.get("attackanimationspeed") { item.attackanimationspeed = Some(v.clone()); } + if let Some(v) = attrs.get("attackhitsounds") { item.attackhitsounds = Some(v.clone()); } + if let Some(v) = attrs.get("storageitem") { item.storageitem = Some(v.clone()); } + if let Some(v) = attrs.get("storagesize") { item.storagesize = v.parse().ok(); } + if let Some(v) = attrs.get("hidemilestone") { item.hidemilestone = v.parse().ok(); } + if let Some(v) = attrs.get("generateicon") { item.generateicon = v.parse().ok(); } + if let Some(v) = attrs.get("comment") { item.comment = Some(v.clone()); } + + current_item = Some(item); + } + b"stat" => { + if let Some(ref mut item) = current_item { + let attrs = parse_attributes(&e)?; + let stat = parse_stat(&attrs); + item.stats.push(stat); + } + } + b"crafting" => { + if let Some(ref mut item) = current_item { + let attrs = parse_attributes(&e)?; + let recipe = CraftingRecipe { + workbench: attrs.get("workbench").and_then(|v| v.parse().ok()), + craftingitems: attrs.get("craftingitems").cloned(), + craftingskill: attrs.get("craftingskill").cloned(), + checks: attrs.get("checks").cloned(), + }; + item.crafting_recipes.push(recipe); + } + } + b"anim" => { + if let Some(ref mut item) = current_item { + let attrs = parse_attributes(&e)?; + let anim = AnimationSet { + idle: attrs.get("idle").cloned(), + walk: attrs.get("walk").cloned(), + run: attrs.get("run").cloned(), + weaponattack: attrs.get("weaponattack").cloned(), + takehit: attrs.get("takehit").cloned(), + }; + item.animations = Some(anim); + } + } + b"generate" => { + if let Some(ref mut item) = current_item { + let attrs = parse_attributes(&e)?; + let rule = GenerateRule { + generatestats: attrs.get("generatestats").cloned(), + generatecrafting: attrs.get("generatecrafting").and_then(|v| v.parse().ok()), + generateicon: attrs.get("generateicon").and_then(|v| v.parse().ok()), + }; + item.generate_rules.push(rule); + } + } + _ => {} + } + } + Ok(Event::End(e)) => { + match e.name().as_ref() { + b"item" => { + if let Some(item) = current_item.take() { + items.push(item); + } + } + _ => {} + } + } + Ok(Event::Eof) => break, + Err(e) => return Err(XmlParseError::XmlError(e)), + _ => {} + } + buf.clear(); + } + + Ok(items) +} + +fn parse_attributes(element: &quick_xml::events::BytesStart) -> Result, XmlParseError> { + let mut attrs = HashMap::new(); + + for attr in element.attributes() { + let attr = attr?; + let key = String::from_utf8_lossy(attr.key.as_ref()).to_string(); + let value = attr.unescape_value()?.to_string(); + attrs.insert(key, value); + } + + Ok(attrs) +} + +fn parse_stat(attrs: &HashMap) -> ItemStat { + ItemStat { + damagephysical: attrs.get("damagephysical").and_then(|v| v.parse().ok()), + damagemagical: attrs.get("damagemagical").and_then(|v| v.parse().ok()), + damageranged: attrs.get("damageranged").and_then(|v| v.parse().ok()), + accuracyphysical: attrs.get("accuracyphysical").and_then(|v| v.parse().ok()), + accuracymagical: attrs.get("accuracymagical").and_then(|v| v.parse().ok()), + accuracyranged: attrs.get("accuracyranged").and_then(|v| v.parse().ok()), + resistancephysical: attrs.get("resistancephysical").and_then(|v| v.parse().ok()), + resistancemagical: attrs.get("resistancemagical").and_then(|v| v.parse().ok()), + resistanceranged: attrs.get("resistanceranged").and_then(|v| v.parse().ok()), + health: attrs.get("health").and_then(|v| v.parse().ok()), + mana: attrs.get("mana").and_then(|v| v.parse().ok()), + manaregen: attrs.get("manaregen").and_then(|v| v.parse().ok()), + healing: attrs.get("healing").and_then(|v| v.parse().ok()), + harvestingspeedwoodcutting: attrs.get("harvestingspeedwoodcutting").and_then(|v| v.parse().ok()), + } +} diff --git a/unity-parser/src/types/component.rs b/unity-parser/src/types/component.rs index e55eb61..2ccd94d 100644 --- a/unity-parser/src/types/component.rs +++ b/unity-parser/src/types/component.rs @@ -139,6 +139,17 @@ pub mod yaml_helpers { .map(String::from) } + /// Get an interned string value from a YAML mapping + /// + /// This version interns the string for memory efficiency, + /// stripping Unity duplicate suffixes before interning. + /// Use this for GameObject names and other frequently duplicated strings. + pub fn get_interned_string(map: &Mapping, key: &str) -> Option> { + map.get(&Value::String(key.to_string())) + .and_then(|v| v.as_str()) + .map(|s| crate::types::intern_string(s)) + } + /// Get an i64 value from a YAML mapping pub fn get_i64(map: &Mapping, key: &str) -> Option { map.get(&Value::String(key.to_string())) diff --git a/unity-parser/src/types/mod.rs b/unity-parser/src/types/mod.rs index 34afc38..64dd34a 100644 --- a/unity-parser/src/types/mod.rs +++ b/unity-parser/src/types/mod.rs @@ -8,6 +8,7 @@ mod component; mod guid; mod ids; mod reference; +mod string_interner; mod type_filter; mod type_registry; mod unity_types; @@ -20,6 +21,7 @@ pub use component::{ pub use guid::Guid; pub use ids::{FileID, LocalID}; pub use reference::UnityReference; +pub use string_interner::intern_string; pub use type_filter::TypeFilter; pub use type_registry::{get_class_name, get_type_id}; pub use unity_types::{ diff --git a/unity-parser/src/types/string_interner.rs b/unity-parser/src/types/string_interner.rs new file mode 100644 index 0000000..7bf3ce1 --- /dev/null +++ b/unity-parser/src/types/string_interner.rs @@ -0,0 +1,233 @@ +//! String interning system for GameObject names +//! +//! This module provides a global string interner that deduplicates GameObject names +//! to reduce memory usage. It also cleans Unity-generated duplicate suffixes like +//! " copy", " (1)", " (2)", etc. + +use once_cell::sync::Lazy; +use regex::Regex; +use std::collections::HashSet; +use std::sync::{Arc, Mutex}; + +/// Global string interner for deduplicating GameObject names +pub struct StringInterner { + strings: Mutex>>, +} + +impl StringInterner { + /// Create a new string interner + fn new() -> Self { + Self { + strings: Mutex::new(HashSet::new()), + } + } + + /// Intern a string, cleaning Unity duplicate suffixes first + /// + /// This function: + /// 1. Removes Unity duplicate suffixes (" copy", " (1)", " (2)", etc.) + /// 2. Checks if the cleaned string is already interned + /// 3. Returns an Arc to the interned string + /// + /// # Examples + /// ``` + /// # use unity_parser::intern_string; + /// let s1 = intern_string("Player"); + /// let s2 = intern_string("Player (1)"); + /// let s3 = intern_string("Player copy"); + /// + /// // All three point to the same interned string + /// assert!(std::sync::Arc::ptr_eq(&s1, &s2)); + /// assert!(std::sync::Arc::ptr_eq(&s1, &s3)); + /// ``` + pub fn intern(&self, s: &str) -> Arc { + // Clean the suffix first + let cleaned = Self::clean_suffix(s); + + // Lock the interner + let mut strings = self.strings.lock().unwrap(); + + // Check if we already have this string + if let Some(existing) = strings.get(cleaned) { + return Arc::clone(existing); + } + + // Create a new interned string + let interned: Arc = Arc::from(cleaned); + strings.insert(Arc::clone(&interned)); + interned + } + + /// Clean Unity duplicate suffixes from a string + /// + /// Removes suffixes like: + /// - " copy" (case insensitive) + /// - " (1)", " (2)", " (123)", etc. + /// + /// Multiple suffixes are removed iteratively. + /// + /// # Examples + /// ```ignore + /// assert_eq!(clean_suffix("GameObject copy"), "GameObject"); + /// assert_eq!(clean_suffix("GameObject (1)"), "GameObject"); + /// assert_eq!(clean_suffix("GameObject (1) (2)"), "GameObject"); + /// ``` + fn clean_suffix(s: &str) -> &str { + static SUFFIX_REGEX: Lazy = Lazy::new(|| { + // Matches: + // - " copy" (case insensitive) + // - " (1)", " (2)", " (123)", etc. + // At the end of the string + Regex::new(r"(?i)\s+copy$|\s+\(\d+\)$").unwrap() + }); + + let mut cleaned = s; + + // Keep stripping suffixes until none remain + // Example: "GameObject (1) (2)" -> "GameObject" + while let Some(mat) = SUFFIX_REGEX.find(cleaned) { + cleaned = &cleaned[..mat.start()]; + } + + cleaned + } +} + +/// Global string interner instance +static STRING_INTERNER: Lazy = Lazy::new(StringInterner::new); + +/// Intern a string (public API) +/// +/// This is the main entry point for string interning. It cleans Unity duplicate +/// suffixes and returns an Arc to the interned string. +/// +/// # Examples +/// ``` +/// # use unity_parser::intern_string; +/// let name1 = intern_string("Player"); +/// let name2 = intern_string("Player (1)"); +/// +/// // Both names point to the same interned string +/// assert!(std::sync::Arc::ptr_eq(&name1, &name2)); +/// assert_eq!(name1.as_ref(), "Player"); +/// ``` +pub fn intern_string(s: &str) -> Arc { + STRING_INTERNER.intern(s) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_clean_suffix_copy() { + assert_eq!(StringInterner::clean_suffix("GameObject copy"), "GameObject"); + assert_eq!(StringInterner::clean_suffix("GameObject Copy"), "GameObject"); + assert_eq!(StringInterner::clean_suffix("GameObject COPY"), "GameObject"); + assert_eq!( + StringInterner::clean_suffix("GameObject copy"), + "GameObject" + ); + } + + #[test] + fn test_clean_suffix_numbered() { + assert_eq!(StringInterner::clean_suffix("GameObject (1)"), "GameObject"); + assert_eq!(StringInterner::clean_suffix("GameObject (2)"), "GameObject"); + assert_eq!( + StringInterner::clean_suffix("GameObject (123)"), + "GameObject" + ); + assert_eq!( + StringInterner::clean_suffix("GameObject (1)"), + "GameObject" + ); + } + + #[test] + fn test_clean_suffix_multiple() { + assert_eq!( + StringInterner::clean_suffix("GameObject (1) (2)"), + "GameObject" + ); + assert_eq!( + StringInterner::clean_suffix("GameObject copy (1)"), + "GameObject" + ); + assert_eq!( + StringInterner::clean_suffix("GameObject (1) copy"), + "GameObject" + ); + } + + #[test] + fn test_clean_suffix_no_match() { + assert_eq!(StringInterner::clean_suffix("GameObject"), "GameObject"); + assert_eq!(StringInterner::clean_suffix("copy"), "copy"); + assert_eq!(StringInterner::clean_suffix("(1)"), "(1)"); + assert_eq!( + StringInterner::clean_suffix("GameObject(1)"), + "GameObject(1)" + ); // No space + assert_eq!( + StringInterner::clean_suffix("GameObject copy suffix"), + "GameObject copy suffix" + ); // Not at end + } + + #[test] + fn test_intern_deduplication() { + let s1 = intern_string("TestString"); + let s2 = intern_string("TestString"); + + // Same Arc pointer (deduplicated) + assert!(Arc::ptr_eq(&s1, &s2)); + assert_eq!(s1.as_ref(), "TestString"); + } + + #[test] + fn test_intern_with_suffix_cleaning() { + let s1 = intern_string("GameObject"); + let s2 = intern_string("GameObject (1)"); + let s3 = intern_string("GameObject copy"); + + // All should point to the same interned string + assert!(Arc::ptr_eq(&s1, &s2)); + assert!(Arc::ptr_eq(&s1, &s3)); + assert_eq!(s1.as_ref(), "GameObject"); + } + + #[test] + fn test_intern_different_strings() { + let s1 = intern_string("GameObject1"); + let s2 = intern_string("GameObject2"); + + // Different pointers (different strings) + assert!(!Arc::ptr_eq(&s1, &s2)); + assert_eq!(s1.as_ref(), "GameObject1"); + assert_eq!(s2.as_ref(), "GameObject2"); + } + + #[test] + fn test_empty_string() { + let s = intern_string(""); + assert_eq!(s.as_ref(), ""); + } + + #[test] + fn test_multiple_suffixes_complex() { + let s = intern_string("Player (1) (2) copy (3)"); + assert_eq!(s.as_ref(), "Player"); + } + + #[test] + fn test_whitespace_variations() { + let s1 = intern_string("Object copy"); // Double space + let s2 = intern_string("Object copy"); // Single space + let s3 = intern_string("Object (1)"); // Triple space + + // All should be deduplicated to "Object" + assert!(Arc::ptr_eq(&s1, &s2)); + assert!(Arc::ptr_eq(&s1, &s3)); + } +} diff --git a/unity-parser/src/types/unity_types/game_object.rs b/unity-parser/src/types/unity_types/game_object.rs index afec620..bd37528 100644 --- a/unity-parser/src/types/unity_types/game_object.rs +++ b/unity-parser/src/types/unity_types/game_object.rs @@ -2,13 +2,14 @@ use crate::types::{yaml_helpers, ComponentContext, UnityComponent}; use sparsey::Entity; +use std::sync::Arc; /// A GameObject component /// /// GameObjects are the fundamental objects in Unity that represent entities in a scene. #[derive(Debug, Clone)] pub struct GameObject { - pub name: String, + pub name: Arc, pub is_active: bool, pub layer: i64, pub tag: i64, @@ -19,7 +20,8 @@ impl UnityComponent for GameObject { /// /// Note: Caller is responsible for ensuring this is called on the correct document type. fn parse(yaml: &serde_yaml::Mapping, _ctx: &ComponentContext) -> Option { - let name = yaml_helpers::get_string(yaml, "m_Name").unwrap_or_default(); + let name = yaml_helpers::get_interned_string(yaml, "m_Name") + .unwrap_or_else(|| Arc::from("")); let is_active = yaml_helpers::get_bool(yaml, "m_IsActive").unwrap_or(true); diff --git a/unity-parser/src/types/unity_types/prefab_instance.rs b/unity-parser/src/types/unity_types/prefab_instance.rs index 6efdecb..4de295f 100644 --- a/unity-parser/src/types/unity_types/prefab_instance.rs +++ b/unity-parser/src/types/unity_types/prefab_instance.rs @@ -811,7 +811,7 @@ mod tests { documents: Vec::new(), file_id_map: HashMap::new(), overrides: HashMap::new(), - next_file_id: i64::MAX, + next_file_id: Arc::new(Cell::new(i64::MAX)), source_path: PathBuf::from("test.prefab"), }; @@ -829,7 +829,7 @@ mod tests { documents: Vec::new(), file_id_map: HashMap::new(), overrides: HashMap::new(), - next_file_id: i64::MAX, + next_file_id: Arc::new(Cell::new(i64::MAX)), source_path: PathBuf::from("test.prefab"), };