Files
zed/crates/language/Cargo.toml
Ichimura Tomoo 81463223d5 Support opening and saving files with legacy encodings (#44819)
## Summary

Addresses #16965

This PR adds support for **opening and saving** files with legacy
encodings (non-UTF-8).
Previously, Zed failed to open files encoded in Shift-JIS, EUC-JP, Big5,
etc., displaying a "Could not open file" error screen. This PR
implements automatic encoding detection upon opening and ensures the
original encoding is preserved when saving.

## Implementation Details

1.  **Worktree (Loading)**:
* Updated `load_file` to use `chardetng` for automatic encoding
detection.
* Files are decoded to UTF-8 internal strings for editing, while
preserving the detected `Encoding` metadata.
2.  **Language / Buffer**:
* Added an `encoding` field to the `Buffer` struct to store the detected
encoding.
3.  **Worktree (Saving)**:
    * Updated `write_file` to accept the stored encoding.
    * **Performance Optimization**:
* **UTF-8 Path**: Uses the existing optimized `fs.save` (streaming
chunks directly from Rope), ensuring no performance regression for the
vast majority of files.
* **Legacy Encoding Path**: Implemented a fallback that converts the
Rope to a contiguous `String/Bytes` in memory, re-encodes it to the
target format (e.g., Shift-JIS), and writes it to disk.
* *Note*: This fallback involves memory allocation, but it is necessary
to support legacy encodings without refactoring the `fs` crate's
streaming interfaces.

## Changes

- `crates/worktree`:
    - Add dependencies: `encoding_rs`, `chardetng`.
    - Update `load_file` to detect encoding and decode content.
    - Update `write_file` to handle re-encoding on save.
- `crates/language`: Add `encoding` field and accessors to `Buffer`.
- `crates/project`: Pass encoding information between Worktree and
Buffer.
- `crates/vim`: Update `:w` command to use the new `write_file`
signature.

## Verification

I validated this manually using a Rust script to generate test files
with various encodings.

**Results:**

*  **Success (Opened & Saved correctly):**
    * **Japanese:** `Shift-JIS` (CP932), `EUC-JP`, `ISO-2022-JP`
    * **Chinese:** `Big5` (Traditional), `GBK/GB2312` (Simplified)
* **Western/Unicode:** `Windows-1252` (CP1252), `UTF-16LE`, `UTF-16BE`
* ⚠️ **limitations (Detection accuracy):**
* Some specific encodings like `KOI8-R` or generic `Latin1` (ISO-8859-1)
may partially display replacement characters (`?`) depending on the file
content length. This is a known limitation of the heuristic detection
library (`chardetng`) rather than the saving logic.


Release Notes:

- Added support for opening and saving files with legacy encodings
(Shift-JIS, Big5, etc.)

---------

Co-authored-by: CrazyboyQCD <53971641+CrazyboyQCD@users.noreply.github.com>
Co-authored-by: Conrad Irwin <conrad.irwin@gmail.com>
2025-12-17 19:46:17 +00:00

101 lines
2.7 KiB
TOML

[package]
name = "language"
version = "0.1.0"
edition.workspace = true
publish.workspace = true
license = "GPL-3.0-or-later"
[lints]
workspace = true
[lib]
path = "src/language.rs"
doctest = false
[features]
test-support = [
"rand",
"collections/test-support",
"lsp/test-support",
"text/test-support",
"tree-sitter-rust",
"tree-sitter-python",
"tree-sitter-typescript",
"tree-sitter-md",
"settings/test-support",
"util/test-support",
]
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
clock.workspace = true
collections.workspace = true
ec4rs.workspace = true
encoding_rs.workspace = true
fs.workspace = true
futures.workspace = true
fuzzy.workspace = true
globset.workspace = true
gpui.workspace = true
http_client.workspace = true
imara-diff.workspace = true
itertools.workspace = true
log.workspace = true
lsp.workspace = true
parking_lot.workspace = true
postage.workspace = true
rand = { workspace = true, optional = true }
regex.workspace = true
rpc.workspace = true
schemars.workspace = true
semver.workspace = true
serde.workspace = true
serde_json.workspace = true
settings.workspace = true
shellexpand.workspace = true
smallvec.workspace = true
smol.workspace = true
streaming-iterator.workspace = true
strsim.workspace = true
sum_tree.workspace = true
task.workspace = true
text.workspace = true
theme.workspace = true
tree-sitter-md = { workspace = true, optional = true }
tree-sitter-python = { workspace = true, optional = true }
tree-sitter-rust = { workspace = true, optional = true }
tree-sitter-typescript = { workspace = true, optional = true }
tree-sitter.workspace = true
unicase = "2.6"
util.workspace = true
watch.workspace = true
zlog.workspace = true
diffy = "0.4.2"
[dev-dependencies]
collections = { workspace = true, features = ["test-support"] }
ctor.workspace = true
gpui = { workspace = true, features = ["test-support"] }
indoc.workspace = true
lsp = { workspace = true, features = ["test-support"] }
pretty_assertions.workspace = true
rand.workspace = true
settings = { workspace = true, features = ["test-support"] }
text = { workspace = true, features = ["test-support"] }
http_client = { workspace = true, features = ["test-support"] }
tree-sitter-elixir.workspace = true
tree-sitter-embedded-template.workspace = true
tree-sitter-heex.workspace = true
tree-sitter-html.workspace = true
tree-sitter-json.workspace = true
tree-sitter-md.workspace = true
tree-sitter-python.workspace = true
tree-sitter-ruby.workspace = true
tree-sitter-rust.workspace = true
tree-sitter-typescript.workspace = true
toml.workspace = true
unindent.workspace = true
util = { workspace = true, features = ["test-support"] }
zlog.workspace = true