Compare commits

...

3 Commits

Author SHA1 Message Date
Joseph T. Lyons
0982a59d66 Revert "gpui: Take advantage of unified memory on Apple silicon (#44273)" (#45022)
This reverts commit 2441dc3f66.

Release Notes:

- N/A
2025-12-16 14:52:46 -05:00
Nathan Sobo
6d4915df16 Revert "Optimize editor rendering when clipped by parent containers" (#45011)
This reverts commit 914b0117fb (#44995).

The optimization introduced a regression that causes the main thread to
hang for **100+ seconds** in certain scenarios, requiring a force quit
to recover.

## Analysis from spindump

When a large `AutoHeight` editor is displayed inside a `List` (e.g.,
Agent Panel thread view), the clipping calculation can produce invalid
row ranges:

1. `visible_bounds` from `window.content_mask().bounds` represents the
window's content mask, not the intersection with the editor
2. When the editor is partially scrolled out of view,
`clipped_top_in_lines` becomes extremely large
3. This causes `start_row` to be computed as an astronomically high
value
4. `blocks_in_range(start_row..end_row)` then spends excessive time in
`Cursor::search_forward` iterating through the block tree

The spindump showed **~46% of samples** (459/1001 over 10+ seconds)
stuck in `BlockSnapshot::blocks_in_range()`, specifically in cursor
iteration.

### Heaviest stack trace
```
EditorElement::prepaint
  └─ blocks_in_range + 236
       └─ Cursor::search_forward (459 samples)
```

## Symptoms

- Main thread unresponsive for 33-113 seconds before sampling even began
- UI completely frozen
- High CPU usage on main thread (10+ seconds of CPU time in the sample)
- Force quit required to recover

## Path forward

The original optimization goal (reducing line layout work for clipped
editors) is valid, but the implementation needs to:
1. Correctly calculate the **intersection** of editor bounds with the
visible viewport
2. Ensure row calculations stay within valid ranges (clamped to
`max_row`)
3. Handle edge cases where the editor is completely outside the visible
bounds

Release Notes:

- Fixed a hang that could occur when viewing large diffs in the Agent
Panel
2025-12-16 12:59:21 -05:00
Joseph T. Lyons
fcacef7324 v0.218.x preview 2025-12-16 11:56:48 -05:00
5 changed files with 20 additions and 77 deletions

View File

@@ -9164,15 +9164,6 @@ impl Element for EditorElement {
let height_in_lines = f64::from(bounds.size.height / line_height);
let max_row = snapshot.max_point().row().as_f64();
// Calculate how much of the editor is clipped by parent containers (e.g., List).
// This allows us to only render lines that are actually visible, which is
// critical for performance when large AutoHeight editors are inside Lists.
let visible_bounds = window.content_mask().bounds;
let clipped_top = (visible_bounds.origin.y - bounds.origin.y).max(px(0.));
let clipped_top_in_lines = f64::from(clipped_top / line_height);
let visible_height_in_lines =
f64::from(visible_bounds.size.height / line_height);
// The max scroll position for the top of the window
let max_scroll_top = if matches!(
snapshot.mode,
@@ -9229,14 +9220,10 @@ impl Element for EditorElement {
let mut scroll_position = snapshot.scroll_position();
// The scroll position is a fractional point, the whole number of which represents
// the top of the window in terms of display rows.
// We add clipped_top_in_lines to skip rows that are clipped by parent containers,
// but we don't modify scroll_position itself since the parent handles positioning.
let start_row =
DisplayRow((scroll_position.y + clipped_top_in_lines).floor() as u32);
let start_row = DisplayRow(scroll_position.y as u32);
let max_row = snapshot.max_point().row();
let end_row = cmp::min(
(scroll_position.y + clipped_top_in_lines + visible_height_in_lines).ceil()
as u32,
(scroll_position.y + height_in_lines).ceil() as u32,
max_row.next_row().0,
);
let end_row = DisplayRow(end_row);

View File

@@ -176,9 +176,11 @@ pub fn block_content_for_tests(
}
pub fn editor_content_with_blocks(editor: &Entity<Editor>, cx: &mut VisualTestContext) -> String {
let draw_size = size(px(3000.0), px(3000.0));
cx.simulate_resize(draw_size);
cx.draw(gpui::Point::default(), draw_size, |_, _| editor.clone());
cx.draw(
gpui::Point::default(),
size(px(3000.0), px(3000.0)),
|_, _| editor.clone(),
);
let (snapshot, mut lines, blocks) = editor.update_in(cx, |editor, window, cx| {
let snapshot = editor.snapshot(window, cx);
let text = editor.display_text(cx);

View File

@@ -15,9 +15,6 @@ pub(crate) struct MetalAtlas(Mutex<MetalAtlasState>);
impl MetalAtlas {
pub(crate) fn new(device: Device) -> Self {
MetalAtlas(Mutex::new(MetalAtlasState {
// Shared memory can be used only if CPU and GPU share the same memory space.
// https://developer.apple.com/documentation/metal/setting-resource-storage-modes
unified_memory: device.has_unified_memory(),
device: AssertSend(device),
monochrome_textures: Default::default(),
polychrome_textures: Default::default(),
@@ -32,7 +29,6 @@ impl MetalAtlas {
struct MetalAtlasState {
device: AssertSend<Device>,
unified_memory: bool,
monochrome_textures: AtlasTextureList<MetalAtlasTexture>,
polychrome_textures: AtlasTextureList<MetalAtlasTexture>,
tiles_by_key: FxHashMap<AtlasKey, AtlasTile>,
@@ -150,11 +146,6 @@ impl MetalAtlasState {
}
texture_descriptor.set_pixel_format(pixel_format);
texture_descriptor.set_usage(usage);
texture_descriptor.set_storage_mode(if self.unified_memory {
metal::MTLStorageMode::Shared
} else {
metal::MTLStorageMode::Managed
});
let metal_texture = self.device.new_texture(&texture_descriptor);
let texture_list = match kind {

View File

@@ -76,22 +76,12 @@ impl InstanceBufferPool {
self.buffers.clear();
}
pub(crate) fn acquire(
&mut self,
device: &metal::Device,
unified_memory: bool,
) -> InstanceBuffer {
pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer {
let buffer = self.buffers.pop().unwrap_or_else(|| {
let options = if unified_memory {
MTLResourceOptions::StorageModeShared
// Buffers are write only which can benefit from the combined cache
// https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined
| MTLResourceOptions::CPUCacheModeWriteCombined
} else {
MTLResourceOptions::StorageModeManaged
};
device.new_buffer(self.buffer_size as u64, options)
device.new_buffer(
self.buffer_size as u64,
MTLResourceOptions::StorageModeManaged,
)
});
InstanceBuffer {
metal_buffer: buffer,
@@ -109,7 +99,6 @@ impl InstanceBufferPool {
pub(crate) struct MetalRenderer {
device: metal::Device,
layer: metal::MetalLayer,
unified_memory: bool,
presents_with_transaction: bool,
command_queue: CommandQueue,
paths_rasterization_pipeline_state: metal::RenderPipelineState,
@@ -190,10 +179,6 @@ impl MetalRenderer {
output
}
// Shared memory can be used only if CPU and GPU share the same memory space.
// https://developer.apple.com/documentation/metal/setting-resource-storage-modes
let unified_memory = device.has_unified_memory();
let unit_vertices = [
to_float2_bits(point(0., 0.)),
to_float2_bits(point(1., 0.)),
@@ -205,12 +190,7 @@ impl MetalRenderer {
let unit_vertices = device.new_buffer_with_data(
unit_vertices.as_ptr() as *const c_void,
mem::size_of_val(&unit_vertices) as u64,
if unified_memory {
MTLResourceOptions::StorageModeShared
| MTLResourceOptions::CPUCacheModeWriteCombined
} else {
MTLResourceOptions::StorageModeManaged
},
MTLResourceOptions::StorageModeManaged,
);
let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state(
@@ -288,7 +268,6 @@ impl MetalRenderer {
device,
layer,
presents_with_transaction: false,
unified_memory,
command_queue,
paths_rasterization_pipeline_state,
path_sprites_pipeline_state,
@@ -358,23 +337,14 @@ impl MetalRenderer {
texture_descriptor.set_width(size.width.0 as u64);
texture_descriptor.set_height(size.height.0 as u64);
texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm);
texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
texture_descriptor
.set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead);
self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor));
if self.path_sample_count > 1 {
// https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus
// Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon
let storage_mode = if self.unified_memory {
metal::MTLStorageMode::Memoryless
} else {
metal::MTLStorageMode::Private
};
let mut msaa_descriptor = texture_descriptor;
msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample);
msaa_descriptor.set_storage_mode(storage_mode);
msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
msaa_descriptor.set_sample_count(self.path_sample_count as _);
self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor));
} else {
@@ -408,10 +378,7 @@ impl MetalRenderer {
};
loop {
let mut instance_buffer = self
.instance_buffer_pool
.lock()
.acquire(&self.device, self.unified_memory);
let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
let command_buffer =
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
@@ -583,14 +550,10 @@ impl MetalRenderer {
command_encoder.end_encoding();
if !self.unified_memory {
// Sync the instance buffer to the GPU
instance_buffer.metal_buffer.did_modify_range(NSRange {
location: 0,
length: instance_offset as NSUInteger,
});
}
instance_buffer.metal_buffer.did_modify_range(NSRange {
location: 0,
length: instance_offset as NSUInteger,
});
Ok(command_buffer.to_owned())
}

View File

@@ -1 +1 @@
dev
preview