terminal: Sanitize URLs with characters that cannot be last (#43559)
Closes #43345
The list of characters comes from the linkify crate, which is already
used for URL detection in the editor:
5239e12e26/src/url.rs (L228)
Release Notes:
- Improved url links detection in terminals.
---------
Signed-off-by: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
dfdad947e1
commit
b17b097204
@@ -160,8 +160,8 @@ fn sanitize_url_punctuation<T: EventListener>(
|
||||
let mut sanitized_url = url;
|
||||
let mut chars_trimmed = 0;
|
||||
|
||||
// First, handle parentheses balancing using single traversal
|
||||
let (open_parens, close_parens) =
|
||||
// Count parentheses in the URL
|
||||
let (open_parens, mut close_parens) =
|
||||
sanitized_url
|
||||
.chars()
|
||||
.fold((0, 0), |(opens, closes), c| match c {
|
||||
@@ -170,33 +170,27 @@ fn sanitize_url_punctuation<T: EventListener>(
|
||||
_ => (opens, closes),
|
||||
});
|
||||
|
||||
// Trim unbalanced closing parentheses
|
||||
if close_parens > open_parens {
|
||||
let mut remaining_close = close_parens;
|
||||
while sanitized_url.ends_with(')') && remaining_close > open_parens {
|
||||
sanitized_url.pop();
|
||||
chars_trimmed += 1;
|
||||
remaining_close -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle trailing periods
|
||||
if sanitized_url.ends_with('.') {
|
||||
let trailing_periods = sanitized_url
|
||||
.chars()
|
||||
.rev()
|
||||
.take_while(|&c| c == '.')
|
||||
.count();
|
||||
|
||||
if trailing_periods > 1 {
|
||||
sanitized_url.truncate(sanitized_url.len() - trailing_periods);
|
||||
chars_trimmed += trailing_periods;
|
||||
} else if trailing_periods == 1
|
||||
&& let Some(second_last_char) = sanitized_url.chars().rev().nth(1)
|
||||
&& (second_last_char.is_alphanumeric() || second_last_char == '/')
|
||||
{
|
||||
// Remove trailing characters that shouldn't be at the end of URLs
|
||||
while let Some(last_char) = sanitized_url.chars().last() {
|
||||
let should_remove = match last_char {
|
||||
// These may be part of a URL but not at the end. It's not that the spec
|
||||
// doesn't allow them, but they are frequently used in plain text as delimiters
|
||||
// where they're not meant to be part of the URL.
|
||||
'.' | ',' | ':' | ';' => true,
|
||||
'(' => true,
|
||||
')' if close_parens > open_parens => {
|
||||
close_parens -= 1;
|
||||
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if should_remove {
|
||||
sanitized_url.pop();
|
||||
chars_trimmed += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,6 +407,8 @@ mod tests {
|
||||
("https://www.google.com/)", "https://www.google.com/"),
|
||||
("https://example.com/path)", "https://example.com/path"),
|
||||
("https://test.com/))", "https://test.com/"),
|
||||
("https://test.com/(((", "https://test.com/"),
|
||||
("https://test.com/(test)(", "https://test.com/(test)"),
|
||||
// Cases that should NOT be sanitized (balanced parentheses)
|
||||
(
|
||||
"https://en.wikipedia.org/wiki/Example_(disambiguation)",
|
||||
@@ -443,10 +439,10 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_periods_sanitization() {
|
||||
// Test URLs with trailing periods (sentence punctuation)
|
||||
fn test_url_punctuation_sanitization() {
|
||||
// Test URLs with trailing punctuation (sentence/text punctuation)
|
||||
// The sanitize_url_punctuation function removes ., ,, :, ;, from the end
|
||||
let test_cases = vec![
|
||||
// Cases that should be sanitized (trailing periods likely punctuation)
|
||||
("https://example.com.", "https://example.com"),
|
||||
(
|
||||
"https://github.com/zed-industries/zed.",
|
||||
@@ -466,13 +462,36 @@ mod tests {
|
||||
"https://en.wikipedia.org/wiki/C.E.O.",
|
||||
"https://en.wikipedia.org/wiki/C.E.O",
|
||||
),
|
||||
// Cases that should NOT be sanitized (periods are part of URL structure)
|
||||
("https://example.com,", "https://example.com"),
|
||||
("https://example.com/path,", "https://example.com/path"),
|
||||
("https://example.com,,", "https://example.com"),
|
||||
("https://example.com:", "https://example.com"),
|
||||
("https://example.com/path:", "https://example.com/path"),
|
||||
("https://example.com::", "https://example.com"),
|
||||
("https://example.com;", "https://example.com"),
|
||||
("https://example.com/path;", "https://example.com/path"),
|
||||
("https://example.com;;", "https://example.com"),
|
||||
("https://example.com.,", "https://example.com"),
|
||||
("https://example.com.:;", "https://example.com"),
|
||||
("https://example.com!.", "https://example.com!"),
|
||||
("https://example.com/).", "https://example.com/"),
|
||||
("https://example.com/);", "https://example.com/"),
|
||||
("https://example.com/;)", "https://example.com/"),
|
||||
(
|
||||
"https://example.com/v1.0/api",
|
||||
"https://example.com/v1.0/api",
|
||||
),
|
||||
("https://192.168.1.1", "https://192.168.1.1"),
|
||||
("https://sub.domain.com", "https://sub.domain.com"),
|
||||
(
|
||||
"https://example.com?query=value",
|
||||
"https://example.com?query=value",
|
||||
),
|
||||
("https://example.com?a=1&b=2", "https://example.com?a=1&b=2"),
|
||||
(
|
||||
"https://example.com/path:8080",
|
||||
"https://example.com/path:8080",
|
||||
),
|
||||
];
|
||||
|
||||
for (input, expected) in test_cases {
|
||||
@@ -484,7 +503,6 @@ mod tests {
|
||||
let end_point = AlacPoint::new(Line(0), Column(input.len()));
|
||||
let dummy_match = Match::new(start_point, end_point);
|
||||
|
||||
// This test should initially fail since we haven't implemented period sanitization yet
|
||||
let (result, _) = sanitize_url_punctuation(input.to_string(), dummy_match, &term);
|
||||
assert_eq!(result, expected, "Failed for input: {}", input);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user