Compare commits

...

6 Commits

Author SHA1 Message Date
João Marcos
0d3633f270 wip: store brackets in a sumtree 2025-04-16 19:11:01 -03:00
João Marcos
ad0500b28a flatten expressions 2025-04-16 19:00:44 -03:00
João Marcos
9446fcbae6 minor tweaks 2025-04-16 19:00:44 -03:00
João Marcos
a2d09286d5 refac get_injections 2025-04-16 19:00:43 -03:00
João Marcos
ed42e849d2 refac reparse_with_ranges 2025-04-16 19:00:03 -03:00
João Marcos P. Bezerra
220c8f4958 flatten expression 2025-04-16 18:58:24 -03:00
4 changed files with 237 additions and 125 deletions

View File

@@ -4468,36 +4468,33 @@ impl<'a> Iterator for BufferChunks<'a> {
}
self.diagnostic_endpoints = diagnostic_endpoints;
if let Some(chunk) = self.chunks.peek() {
let chunk_start = self.range.start;
let mut chunk_end = (self.chunks.offset() + chunk.len())
.min(next_capture_start)
.min(next_diagnostic_endpoint);
let mut highlight_id = None;
if let Some(highlights) = self.highlights.as_ref() {
if let Some((parent_capture_end, parent_highlight_id)) = highlights.stack.last() {
chunk_end = chunk_end.min(*parent_capture_end);
highlight_id = Some(*parent_highlight_id);
}
}
let chunk = self.chunks.peek()?;
let slice =
&chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()];
self.range.start = chunk_end;
if self.range.start == self.chunks.offset() + chunk.len() {
self.chunks.next().unwrap();
let chunk_start = self.range.start;
let mut chunk_end = (self.chunks.offset() + chunk.len())
.min(next_capture_start)
.min(next_diagnostic_endpoint);
let mut highlight_id = None;
if let Some(highlights) = self.highlights.as_ref() {
if let Some((parent_capture_end, parent_highlight_id)) = highlights.stack.last() {
chunk_end = chunk_end.min(*parent_capture_end);
highlight_id = Some(*parent_highlight_id);
}
Some(Chunk {
text: slice,
syntax_highlight_id: highlight_id,
diagnostic_severity: self.current_diagnostic_severity(),
is_unnecessary: self.current_code_is_unnecessary(),
..Default::default()
})
} else {
None
}
let slice = &chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()];
self.range.start = chunk_end;
if self.range.start == self.chunks.offset() + chunk.len() {
self.chunks.next().unwrap();
}
Some(Chunk {
text: slice,
syntax_highlight_id: highlight_id,
diagnostic_severity: self.current_diagnostic_severity(),
is_unnecessary: self.current_code_is_unnecessary(),
..Default::default()
})
}
}

View File

@@ -92,6 +92,7 @@ struct SyntaxLayerEntry {
enum SyntaxLayerContent {
Parsed {
tree: tree_sitter::Tree,
brackets: SumTree<BracketItem>,
language: Arc<Language>,
},
Pending {
@@ -115,6 +116,72 @@ impl SyntaxLayerContent {
}
}
// "fn main() { }"
// [Iso(7), Open(1), Close(1), Iso(2), Open(1), Iso(3), Close(1)]
#[derive(Clone)]
enum BracketItem {
Isomorphic { len: usize },
OpenBracket { len: usize },
CloseBracket { len: usize },
}
impl BracketItem {
pub fn len(&self) -> usize {
match self {
&Self::Isomorphic { len } => len,
&Self::OpenBracket { len } => len,
&Self::CloseBracket { len } => len,
}
}
}
#[derive(Clone, Default)]
struct BracketSummary {
len: usize,
/// The change in depth that happened inside this summary.
depth_diff: i32,
}
impl sum_tree::Summary for BracketSummary {
type Context = ();
fn zero(_: &()) -> Self {
Self::default()
}
fn add_summary(&mut self, summary: &Self, _: &()) {
self.len += summary.len;
self.depth_diff += summary.depth_diff;
}
}
impl sum_tree::Item for BracketItem {
type Summary = BracketSummary;
fn summary(&self, _: &()) -> Self::Summary {
let depth_diff = match self {
&Self::Isomorphic { .. } => 0,
&Self::OpenBracket { .. } => 1,
&Self::CloseBracket { .. } => -1,
};
BracketSummary {
len: self.len(),
depth_diff,
}
}
}
impl<'a> sum_tree::Dimension<'a, BracketSummary> for usize {
fn zero(_cx: &()) -> Self {
0
}
fn add_summary(&mut self, summary: &'a BracketSummary, _: &()) {
*self += summary.len;
}
}
/// A layer of syntax highlighting, corresponding to a single syntax
/// tree in a particular language.
#[derive(Debug)]
@@ -448,7 +515,6 @@ impl SyntaxSnapshot {
let mut changed_regions = ChangeRegionSet::default();
let mut queue = BinaryHeap::new();
let mut combined_injection_ranges = HashMap::default();
queue.push(ParseStep {
depth: 0,
language: ParseStepLanguage::Loaded {
@@ -546,12 +612,13 @@ impl SyntaxSnapshot {
}
let content = match step.language {
ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending {
language_name: name,
},
ParseStepLanguage::Loaded { language } => {
let Some(grammar) = language.grammar() else {
continue;
};
let tree;
let changed_ranges;
let mut included_ranges = step.included_ranges;
for range in &mut included_ranges {
@@ -565,7 +632,14 @@ impl SyntaxSnapshot {
.to_ts_point();
}
if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_start)) =
let (old_tree, mut brackets) = if let Some((
SyntaxLayerContent::Parsed {
tree: old_tree,
brackets,
..
},
layer_start,
)) =
old_layer.map(|layer| (&layer.content, layer.range.start))
{
log::trace!(
@@ -601,12 +675,7 @@ impl SyntaxSnapshot {
}
if included_ranges.is_empty() {
included_ranges.push(tree_sitter::Range {
start_byte: 0,
end_byte: 0,
start_point: Default::default(),
end_point: Default::default(),
});
included_ranges.push(zeroed_tree_sitter_range());
}
log::trace!(
@@ -616,32 +685,7 @@ impl SyntaxSnapshot {
LogIncludedRanges(&included_ranges),
);
let result = parse_text(
grammar,
text.as_rope(),
step_start_byte,
included_ranges,
Some(old_tree.clone()),
);
match result {
Ok(t) => tree = t,
Err(e) => {
log::error!("error parsing text: {:?}", e);
continue;
}
};
changed_ranges = join_ranges(
invalidated_ranges
.iter()
.filter(|&range| {
range.start <= step_end_byte && range.end >= step_start_byte
})
.cloned(),
old_tree.changed_ranges(&tree).map(|r| {
step_start_byte + r.start_byte..step_start_byte + r.end_byte
}),
);
(Some(old_tree), brackets.clone())
} else {
if matches!(step.mode, ParseMode::Combined { .. }) {
insert_newlines_between_ranges(
@@ -654,12 +698,7 @@ impl SyntaxSnapshot {
}
if included_ranges.is_empty() {
included_ranges.push(tree_sitter::Range {
start_byte: 0,
end_byte: 0,
start_point: Default::default(),
end_point: Default::default(),
});
included_ranges.push(zeroed_tree_sitter_range());
}
log::trace!(
@@ -669,58 +708,89 @@ impl SyntaxSnapshot {
LogIncludedRanges(&included_ranges),
);
let result = parse_text(
grammar,
text.as_rope(),
step_start_byte,
included_ranges,
None,
);
match result {
Ok(t) => tree = t,
Err(e) => {
log::error!("error parsing text: {:?}", e);
continue;
}
};
changed_ranges = vec![step_start_byte..step_end_byte];
}
(None, SumTree::new(&()))
};
if let (Some((config, registry)), false) = (
grammar.injection_config.as_ref().zip(registry.as_ref()),
changed_ranges.is_empty(),
) {
for range in &changed_ranges {
changed_regions.insert(
ChangedRegion {
let result = parse_text(
grammar,
text.as_rope(),
step_start_byte,
included_ranges,
old_tree.cloned(),
);
let tree = match result {
Ok(inner) => inner,
Err(e) => {
log::error!("error parsing text: {:?}", e);
continue;
}
};
let changed_ranges = if let Some(old_tree) = old_tree {
join_ranges(
invalidated_ranges
.iter()
.filter(|&range| {
range.start <= step_end_byte && range.end >= step_start_byte
})
.cloned(),
old_tree.changed_ranges(&tree).map(|r| {
step_start_byte + r.start_byte..step_start_byte + r.end_byte
}),
)
} else {
vec![step_start_byte..step_end_byte]
};
// re-run queries if something changed
if !changed_ranges.is_empty() {
if let Some((config, registry)) =
grammar.injection_config.as_ref().zip(registry.as_ref())
{
for range in &changed_ranges {
let region = ChangedRegion {
depth: step.depth + 1,
range: text.anchor_before(range.start)
..text.anchor_after(range.end),
},
};
changed_regions.insert(region, text);
}
update_injection_parse_steps(
config,
text,
step.range.clone(),
tree.root_node_with_offset(
step_start_byte,
step_start_point.to_ts_point(),
),
registry,
step.depth + 1,
&changed_ranges,
&mut queue,
);
}
if let Some(config) = grammar.brackets_config.as_ref() {
update_brackets_in_range(
config,
text,
tree.root_node_with_offset(
step_start_byte,
step_start_point.to_ts_point(),
),
&changed_ranges,
&mut brackets,
);
}
get_injections(
config,
text,
step.range.clone(),
tree.root_node_with_offset(
step_start_byte,
step_start_point.to_ts_point(),
),
registry,
step.depth + 1,
&changed_ranges,
&mut combined_injection_ranges,
&mut queue,
);
}
SyntaxLayerContent::Parsed { tree, language }
SyntaxLayerContent::Parsed {
tree,
language,
brackets,
}
}
ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending {
language_name: name,
},
};
layers.push(
@@ -866,7 +936,7 @@ impl SyntaxSnapshot {
iter::from_fn(move || {
while let Some(layer) = cursor.item() {
let mut info = None;
if let SyntaxLayerContent::Parsed { tree, language } = &layer.content {
if let SyntaxLayerContent::Parsed { tree, language, .. } = &layer.content {
let layer_start_offset = layer.range.start.to_offset(buffer);
let layer_start_point = layer.range.start.to_point(buffer).to_ts_point();
if include_hidden || !language.config.hidden {
@@ -1023,6 +1093,7 @@ impl<'a> SyntaxMapCaptures<'a> {
pub struct TreeSitterOptions {
max_start_depth: Option<u32>,
}
impl TreeSitterOptions {
pub fn max_start_depth(max_start_depth: u32) -> Self {
Self {
@@ -1250,7 +1321,8 @@ fn parse_text(
})
}
fn get_injections(
#[allow(clippy::too_many_arguments)]
fn update_injection_parse_steps(
config: &InjectionConfig,
text: &BufferSnapshot,
outer_range: Range<Anchor>,
@@ -1258,15 +1330,16 @@ fn get_injections(
language_registry: &Arc<LanguageRegistry>,
depth: usize,
changed_ranges: &[Range<usize>],
combined_injection_ranges: &mut HashMap<LanguageId, (Arc<Language>, Vec<tree_sitter::Range>)>,
queue: &mut BinaryHeap<ParseStep>,
) {
let mut query_cursor = QueryCursorHandle::new();
let mut prev_match = None;
// Ensure that a `ParseStep` is created for every combined injection language, even
// if there currently no matches for that injection.
combined_injection_ranges.clear();
// Note: a `ParseStep` must be created for every combined injection language, even
// if there are currently no matches for that injection.
let mut combined_injection_ranges =
HashMap::<LanguageId, (Arc<Language>, Vec<tree_sitter::Range>)>::default();
for pattern in &config.patterns {
if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined) {
if let Some(language) = language_registry
@@ -1290,7 +1363,6 @@ fn get_injections(
if content_ranges.is_empty() {
continue;
}
let content_range =
content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte;
@@ -1381,6 +1453,46 @@ fn get_injections(
}
}
fn update_brackets_in_range(
config: &BracketConfig,
text: &BufferSnapshot,
node: Node,
changed_ranges: &[Range<usize>],
brackets_tree: &mut SumTree<BracketItem>,
) {
let mut query_cursor = QueryCursorHandle::new();
for changed_range in changed_ranges {
let mut new_bracket_subtree = SumTree::new(&());
// TODO: is this saturating_sub necessary?
query_cursor.set_byte_range(changed_range.start.saturating_sub(1)..changed_range.end + 1);
for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) {
let open_capture = mat.nodes_for_capture_index(config.open_capture_ix).next();
let close_capture = mat.nodes_for_capture_index(config.close_capture_ix).next();
let Some((open_capture, close_capture)) = open_capture.zip(close_capture) else {
log::warn!("couldn't find @open and @close captures in brackets pattern");
continue;
};
todo!("how do I insert this into the SumTree, or, should I put it in a BTreeSet");
// cursor.see
}
let mut cursor = brackets_tree.cursor::<usize>(&());
let mut left_side = cursor.slice(&changed_range.start, Bias::Left, &());
cursor.seek(&changed_range.end, Bias::Right, &());
let right_side = cursor.suffix(&());
left_side.append(new_bracket_subtree, &());
left_side.append(right_side, &());
drop(cursor); // make the borrow checker happy
*brackets_tree = left_side;
}
}
/// Updates the given list of included `ranges`, removing any ranges that intersect
/// `removed_ranges`, and inserting the given `new_ranges`.
///
@@ -1903,3 +2015,12 @@ impl fmt::Debug for LogPoint {
(self.0.row, self.0.column).fmt(f)
}
}
fn zeroed_tree_sitter_range() -> tree_sitter::Range {
tree_sitter::Range {
start_byte: 0,
end_byte: 0,
start_point: Default::default(),
end_point: Default::default(),
}
}

View File

@@ -1216,7 +1216,7 @@ impl TextDimension for TextSummary {
impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
fn zero(_cx: &()) -> Self {
Default::default()
0
}
fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {

View File

@@ -372,13 +372,7 @@ where
"Must call `seek`, `next` or `prev` before calling this method"
);
}
}
impl<'a, T, D> Cursor<'a, T, D>
where
T: Item,
D: Dimension<'a, T::Summary>,
{
#[track_caller]
pub fn seek<Target>(
&mut self,