Start on a SumTree-based Rope implementation

This commit is contained in:
Antonio Scandurra
2021-05-14 11:33:39 +02:00
parent 9aeb35bfab
commit e48973f75a
3 changed files with 340 additions and 0 deletions

View File

@@ -1,5 +1,6 @@
mod anchor;
mod point;
mod rope;
mod selection;
pub use anchor::*;

View File

@@ -0,0 +1,295 @@
use super::Point;
use crate::sum_tree::{self, SeekBias, SumTree};
use arrayvec::ArrayString;
use std::{cmp, ops::Range, str};
#[cfg(test)]
const CHUNK_BASE: usize = 2;
#[cfg(not(test))]
const CHUNK_BASE: usize = 8;
#[derive(Clone, Default, Debug)]
pub struct Rope {
chunks: SumTree<Chunk>,
}
impl Rope {
pub fn new() -> Self {
Self::default()
}
pub fn append(&mut self, rope: Rope) {
self.chunks.push_tree(rope.chunks, &());
}
pub fn push(&mut self, mut text: &str) {
let mut suffix = ArrayString::<[_; 2 * CHUNK_BASE]>::new();
self.chunks.with_last_mut(
|chunk| {
if chunk.0.len() + text.len() <= 2 * CHUNK_BASE {
chunk.0.push_str(text);
text = "";
} else {
let mut append_len = CHUNK_BASE.saturating_sub(chunk.0.len());
while !text.is_char_boundary(append_len) {
append_len -= 1;
}
if append_len > 0 {
let split = text.split_at(append_len);
chunk.0.push_str(split.0);
text = split.1;
} else {
let mut take_len = CHUNK_BASE;
while !chunk.0.is_char_boundary(take_len) {
take_len -= 1;
}
let split = chunk.0.split_at(take_len);
suffix.push_str(split.1);
chunk.0 = ArrayString::from(split.0).unwrap();
}
}
},
&(),
);
let mut chunks = vec![];
let mut chunk = ArrayString::new();
for ch in suffix.chars().chain(text.chars()) {
if chunk.len() + ch.len_utf8() > CHUNK_BASE {
chunks.push(Chunk(chunk));
chunk = ArrayString::new();
}
chunk.push(ch);
}
if !chunk.is_empty() {
chunks.push(Chunk(chunk));
}
self.chunks.extend(chunks, &());
}
pub fn slice(&self, range: Range<usize>) -> Rope {
let mut slice = Rope::new();
let mut cursor = self.chunks.cursor::<usize, usize>();
cursor.slice(&range.start, SeekBias::Left, &());
if let Some(start_chunk) = cursor.item() {
let start_ix = range.start - cursor.start();
let end_ix = cmp::min(range.end, cursor.end()) - cursor.start();
slice.push(&start_chunk.0[start_ix..end_ix]);
}
if range.end > cursor.end() {
cursor.next();
slice.append(Rope {
chunks: cursor.slice(&range.end, SeekBias::Left, &()),
});
if let Some(end_chunk) = cursor.item() {
slice.push(&end_chunk.0[..range.end - cursor.start()]);
}
}
slice
}
pub fn summary(&self) -> TextSummary {
self.chunks.summary()
}
pub fn chars(&self) -> Chars {
self.chars_at(0)
}
pub fn chars_at(&self, start: usize) -> Chars {
Chars::new(self, start)
}
fn text(&self) -> String {
let mut text = String::new();
for chunk in self.chunks.cursor::<(), ()>() {
text.push_str(&chunk.0);
}
text
}
}
impl<'a> From<&'a str> for Rope {
fn from(text: &'a str) -> Self {
let mut rope = Self::new();
rope.push(text);
rope
}
}
#[derive(Clone, Debug, Default)]
struct Chunk(ArrayString<[u8; 2 * CHUNK_BASE]>);
impl sum_tree::Item for Chunk {
type Summary = TextSummary;
fn summary(&self) -> Self::Summary {
let mut chars = 0;
let mut bytes = 0;
let mut lines = Point::new(0, 0);
let mut first_line_len = 0;
let mut rightmost_point = Point::new(0, 0);
for c in self.0.chars() {
chars += 1;
bytes += c.len_utf8();
if c == '\n' {
lines.row += 1;
lines.column = 0;
} else {
lines.column += 1;
if lines.row == 0 {
first_line_len = lines.column;
}
if lines.column > rightmost_point.column {
rightmost_point = lines;
}
}
}
TextSummary {
chars,
bytes,
lines,
first_line_len,
rightmost_point,
}
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct TextSummary {
pub chars: usize,
pub bytes: usize,
pub lines: Point,
pub first_line_len: u32,
pub rightmost_point: Point,
}
impl sum_tree::Summary for TextSummary {
type Context = ();
fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
*self += summary;
}
}
impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
fn add_assign(&mut self, other: &'a Self) {
let joined_line_len = self.lines.column + other.first_line_len;
if joined_line_len > self.rightmost_point.column {
self.rightmost_point = Point::new(self.lines.row, joined_line_len);
}
if other.rightmost_point.column > self.rightmost_point.column {
self.rightmost_point = self.lines + &other.rightmost_point;
}
if self.lines.row == 0 {
self.first_line_len += other.first_line_len;
}
self.chars += other.chars;
self.bytes += other.bytes;
self.lines += &other.lines;
}
}
impl std::ops::AddAssign<Self> for TextSummary {
fn add_assign(&mut self, other: Self) {
*self += &other;
}
}
impl<'a> sum_tree::Dimension<'a, TextSummary> for usize {
fn add_summary(&mut self, summary: &'a TextSummary) {
*self += summary.chars;
}
}
pub struct Chars<'a> {
cursor: sum_tree::Cursor<'a, Chunk, usize, usize>,
chars: str::Chars<'a>,
}
impl<'a> Chars<'a> {
pub fn new(rope: &'a Rope, start: usize) -> Self {
let mut cursor = rope.chunks.cursor::<usize, usize>();
cursor.slice(&start, SeekBias::Left, &());
let chunk = cursor.item().expect("invalid index");
let chars = chunk.0[start - cursor.start()..].chars();
Self { cursor, chars }
}
}
impl<'a> Iterator for Chars<'a> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if let Some(ch) = self.chars.next() {
Some(ch)
} else if let Some(chunk) = self.cursor.item() {
self.chars = chunk.0.chars();
self.cursor.next();
Some(self.chars.next().unwrap())
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use crate::util::RandomCharIter;
use super::*;
use rand::prelude::*;
use std::env;
#[test]
fn test_random() {
let iterations = env::var("ITERATIONS")
.map(|i| i.parse().expect("invalid `ITERATIONS` variable"))
.unwrap_or(100);
let operations = env::var("OPERATIONS")
.map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
.unwrap_or(10);
let seed_range = if let Ok(seed) = env::var("SEED") {
let seed = seed.parse().expect("invalid `SEED` variable");
seed..seed + 1
} else {
0..iterations
};
for seed in seed_range {
dbg!(seed);
let mut rng = StdRng::seed_from_u64(seed);
let mut expected = String::new();
let mut actual = Rope::new();
for _ in 0..operations {
let end_ix = rng.gen_range(0..=expected.len());
let start_ix = rng.gen_range(0..=end_ix);
let len = rng.gen_range(0..=5);
let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
let mut new_actual = Rope::new();
new_actual.append(actual.slice(0..start_ix));
new_actual.push(&new_text);
new_actual.append(actual.slice(end_ix..actual.summary().chars));
actual = new_actual;
let mut new_expected = String::new();
new_expected.push_str(&expected[..start_ix]);
new_expected.push_str(&new_text);
new_expected.push_str(&expected[end_ix..]);
expected = new_expected;
assert_eq!(actual.text(), expected);
}
}
}
}

View File

@@ -101,6 +101,50 @@ impl<T: Item> SumTree<T> {
self.rightmost_leaf().0.items().last()
}
pub fn with_last_mut(
&mut self,
f: impl FnOnce(&mut T),
ctx: &<T::Summary as Summary>::Context,
) {
self.with_last_mut_recursive(f, ctx);
}
fn with_last_mut_recursive(
&mut self,
f: impl FnOnce(&mut T),
ctx: &<T::Summary as Summary>::Context,
) -> Option<T::Summary> {
match Arc::make_mut(&mut self.0) {
Node::Internal {
summary,
child_summaries,
child_trees,
..
} => {
let last_summary = child_summaries.last_mut().unwrap();
let last_child = child_trees.last_mut().unwrap();
*last_summary = last_child.with_last_mut_recursive(f, ctx).unwrap();
*summary = sum(child_summaries.iter(), ctx);
Some(summary.clone())
}
Node::Leaf {
summary,
items,
item_summaries,
} => {
if let Some((item, item_summary)) = items.last_mut().zip(item_summaries.last_mut())
{
(f)(item);
*item_summary = item.summary();
*summary = sum(item_summaries.iter(), ctx);
Some(summary.clone())
} else {
None
}
}
}
}
pub fn extent<'a, D: Dimension<'a, T::Summary>>(&'a self) -> D {
let mut extent = D::default();
match self.0.as_ref() {