Compare commits

...

6 Commits

Author SHA1 Message Date
Richard Feldman
5324ef894e Use Pratt Parsing for shell commands 2025-03-08 00:42:59 -05:00
Richard Feldman
b1fca741a9 wip 2025-03-08 00:41:17 -05:00
Richard Feldman
fb51b99198 Make tests avoid the non-space options 2025-03-08 00:41:17 -05:00
Richard Feldman
cb79ee20c7 Make sandboxed_shell work except for operators without spaces 2025-03-08 00:41:17 -05:00
Richard Feldman
c5f0a5bb3e wip 2025-03-08 00:41:17 -05:00
Richard Feldman
38136cb0c0 Add sandboxed_shell 2025-03-08 00:41:17 -05:00
6 changed files with 1135 additions and 19 deletions

23
Cargo.lock generated
View File

@@ -2300,7 +2300,7 @@ dependencies = [
"cap-primitives",
"cap-std",
"io-lifetimes",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -2328,7 +2328,7 @@ dependencies = [
"ipnet",
"maybe-owned",
"rustix",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
"winx",
]
@@ -4402,7 +4402,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
dependencies = [
"libc",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -5064,7 +5064,7 @@ checksum = "5e2e6123af26f0f2c51cc66869137080199406754903cc926a7690401ce09cb4"
dependencies = [
"io-lifetimes",
"rustix",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -6709,7 +6709,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2285ddfe3054097ef4b2fe909ef8c3bcd1ea52a8f0d274416caebeef39f04a65"
dependencies = [
"io-lifetimes",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -10734,7 +10734,7 @@ dependencies = [
"once_cell",
"socket2",
"tracing",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -11656,7 +11656,7 @@ dependencies = [
"libc",
"linux-raw-sys",
"once_cell",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -11927,6 +11927,7 @@ dependencies = [
"serde",
"serde_json",
"settings",
"shlex",
"util",
]
@@ -13426,7 +13427,7 @@ dependencies = [
"fd-lock",
"io-lifetimes",
"rustix",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
"winx",
]
@@ -13566,7 +13567,7 @@ dependencies = [
"getrandom 0.3.1",
"once_cell",
"rustix",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -15912,7 +15913,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -16377,7 +16378,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f3fd376f71958b862e7afb20cfe5a22830e1963462f3a17f49d82a6c1d1f42d"
dependencies = [
"bitflags 2.8.0",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]

View File

@@ -16,6 +16,7 @@ doctest = false
anyhow.workspace = true
assistant_tool.workspace = true
collections.workspace = true
shlex.workspace = true
futures.workspace = true
gpui.workspace = true
mlua.workspace = true

View File

@@ -1,9 +1,7 @@
---@diagnostic disable: undefined-global
-- Create a sandbox environment
local sandbox = {}
-- Allow access to standard libraries (safe subset)
sandbox.string = string
sandbox.table = table
sandbox.math = math
@@ -15,24 +13,19 @@ sandbox.pairs = pairs
sandbox.ipairs = ipairs
sandbox.search = search
-- Create a sandboxed version of LuaFileIO
local io = {}
-- File functions
io.open = sb_io_open
io.popen = sb_io_popen
-- Add the sandboxed io library to the sandbox environment
sandbox.io = io
-- Load the script with the sandbox environment
local user_script_fn, err = load(user_script, nil, "t", sandbox)
if not user_script_fn then
error("Failed to load user script: " .. tostring(err))
end
-- Execute the user script within the sandbox
local success, result = pcall(user_script_fn)
if not success then

View File

@@ -0,0 +1,713 @@
/// Models will commonly generate POSIX shell one-liner commands which
/// they run via io.popen() in Lua. Instead of giving those shell command
/// strings to the operating system - which is a security risk, and
/// which can eaisly fail on Windows, since Windows doesn't do POSIX - we
/// parse the shell command ourselves and translate it into a sequence of
/// commands in our normal sandbox. Essentially, this is an extremely
/// minimalstic shell which Lua popen() commands can execute in.
///
/// Our shell supports:
/// - Basic commands and args
/// - The operators `|`, `&&`, `;`, `>`, `1>`, `2>`, `&>`, `>&`
///
/// The operators currently have to have whitespace around them because the
/// `shlex` crate we use to tokenize the strings does not treat operators
/// as word boundaries, even though shells do. Fortunately, LLMs consistently
/// generate spaces around these operators anyway.
use mlua::{Error, Result};
#[derive(Debug, Clone, PartialEq, Default)]
pub struct ShellCmd {
pub command: String,
pub args: Vec<String>,
pub stdout_redirect: Option<String>,
pub stderr_redirect: Option<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Operator {
/// The `|` shell operator (highest precedence)
Pipe,
/// The `&&` shell operator (medium precedence)
And,
/// The `;` shell operator (lowest precedence)
Semicolon,
}
impl Operator {
fn precedence(&self) -> u8 {
match self {
Operator::Pipe => 3,
Operator::And => 2,
Operator::Semicolon => 1,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ShellAst {
Command(ShellCmd),
Operation {
operator: Operator,
left: Box<ShellAst>,
right: Box<ShellAst>,
},
}
impl ShellAst {
/// Parse a shell string and build an abstract syntax tree.
pub fn parse(string: impl AsRef<str>) -> Result<Self> {
let string = string.as_ref();
// Check for unsupported shell features
if string.contains('$')
|| string.contains('`')
|| string.contains('(')
|| string.contains(')')
|| string.contains('{')
|| string.contains('}')
{
return Err(Error::RuntimeError(
"Complex shell features (subshells, variables, backgrounding, etc.) are not available in this shell."
.to_string(),
));
}
let mut parser = ShellParser::new(string);
parser.parse_expression(0)
}
}
enum Redirect {
Stdout,
Stderr,
Both,
}
struct ShellParser<'a> {
lexer: shlex::Shlex<'a>,
current_token: Option<String>,
}
impl<'a> ShellParser<'a> {
fn new(input: &'a str) -> Self {
let mut lexer = shlex::Shlex::new(input);
let current_token = lexer.next();
Self {
lexer,
current_token,
}
}
fn advance(&mut self) {
self.current_token = self.lexer.next();
}
fn peek(&self) -> Option<&str> {
self.current_token.as_deref()
}
fn parse_expression(&mut self, min_precedence: u8) -> Result<ShellAst> {
// Parse the first command or atom
let mut left = ShellAst::Command(self.parse_command()?);
// While we have operators with sufficient precedence, keep building the tree
loop {
let op = match self.parse_operator() {
Some(op) if op.precedence() >= min_precedence => op,
_ => break,
};
// Consume the operator token
self.advance();
// Special case for trailing semicolons - if we have no more tokens,
// we don't need to parse another command
if op == Operator::Semicolon && self.peek().is_none() {
break;
}
// Parse the right side with higher precedence
// For left-associative operators, we use op.precedence() + 1
let right = self.parse_expression(op.precedence() + 1)?;
// Build the operation node
left = ShellAst::Operation {
operator: op,
left: Box::new(left),
right: Box::new(right),
};
}
Ok(left)
}
fn parse_operator(&self) -> Option<Operator> {
match self.peek()? {
"|" => Some(Operator::Pipe),
"&&" => Some(Operator::And),
";" => Some(Operator::Semicolon),
_ => None,
}
}
fn handle_redirection(&mut self, cmd: &mut ShellCmd, redirect: Redirect) -> Result<()> {
self.advance(); // consume the redirection operator
let target = self.peek().ok_or_else(|| {
Error::RuntimeError("Missing redirection target in shell".to_string())
})?;
match redirect {
Redirect::Stdout => {
cmd.stdout_redirect = Some(target.to_string());
}
Redirect::Stderr => {
cmd.stderr_redirect = Some(target.to_string());
}
Redirect::Both => {
cmd.stdout_redirect = Some(target.to_string());
cmd.stderr_redirect = Some(target.to_string());
}
}
self.advance(); // consume the target
Ok(())
}
fn parse_command(&mut self) -> Result<ShellCmd> {
let mut cmd = ShellCmd::default();
// Process tokens until we hit an operator or end of input
loop {
let redirect;
match self.peek() {
Some(token) => {
match token {
"|" | "&&" | ";" => break, // These are operators, not part of the command
">" | "1>" => {
redirect = Some(Redirect::Stdout);
}
"2>" => {
redirect = Some(Redirect::Stderr);
}
"&>" | ">&" => {
redirect = Some(Redirect::Both);
}
"&" => {
// Reject ampersand as it's used for backgrounding processes
return Err(Error::RuntimeError(
"Background processes (using &) are not available in this shell."
.to_string(),
));
}
_ => {
redirect = None;
}
}
}
None => {
break; // We ran out of tokens; exit the loop.
}
}
// We do this separate conditional after the borrow from the peek()
// has expired, to avoid a borrow checker error.
match redirect {
Some(redirect) => {
self.handle_redirection(&mut cmd, redirect)?;
}
None => {
// It's either the command name or an argument
let mut token = self.current_token.take().unwrap();
self.advance();
// Handle trailing semicolons
let original_token_len = token.len();
while token.ends_with(';') {
token.pop();
}
let had_semicolon = token.len() != original_token_len;
if cmd.command.is_empty() {
cmd.command = token;
} else {
cmd.args.push(token);
}
if had_semicolon {
// Put the semicolon back as the next token, so after we break we parse it.
self.current_token = Some(";".to_string());
break;
}
}
}
}
if cmd.command.is_empty() {
return Err(Error::RuntimeError(
"Missing command to run in shell".to_string(),
));
}
Ok(cmd)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_command() {
// Basic command with no args or operators
let cmd = "ls";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Command(shell_cmd) = ast {
assert_eq!(shell_cmd.command, "ls");
assert!(shell_cmd.args.is_empty());
assert_eq!(shell_cmd.stdout_redirect, None);
assert_eq!(shell_cmd.stderr_redirect, None);
} else {
panic!("Expected Command node");
}
}
#[test]
fn test_command_with_args() {
// Command with arguments
let cmd = "ls -la /home";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Command(shell_cmd) = ast {
assert_eq!(shell_cmd.command, "ls");
assert_eq!(shell_cmd.args, vec!["-la".to_string(), "/home".to_string()]);
assert_eq!(shell_cmd.stdout_redirect, None);
assert_eq!(shell_cmd.stderr_redirect, None);
} else {
panic!("Expected Command node");
}
}
#[test]
fn test_simple_pipe() {
// Test pipe operator
let cmd = "ls -l | grep txt";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Operation {
operator,
left,
right,
} = ast
{
assert_eq!(operator, Operator::Pipe);
if let ShellAst::Command(left_cmd) = *left {
assert_eq!(left_cmd.command, "ls");
assert_eq!(left_cmd.args, vec!["-l".to_string()]);
} else {
panic!("Expected Command node for left side");
}
if let ShellAst::Command(right_cmd) = *right {
assert_eq!(right_cmd.command, "grep");
assert_eq!(right_cmd.args, vec!["txt".to_string()]);
} else {
panic!("Expected Command node for right side");
}
} else {
panic!("Expected Operation node");
}
}
#[test]
fn test_simple_and() {
// Test && operator
let cmd = "mkdir test && cd test";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Operation {
operator,
left,
right,
} = ast
{
assert_eq!(operator, Operator::And);
if let ShellAst::Command(left_cmd) = *left {
assert_eq!(left_cmd.command, "mkdir");
assert_eq!(left_cmd.args, vec!["test".to_string()]);
} else {
panic!("Expected Command node for left side");
}
if let ShellAst::Command(right_cmd) = *right {
assert_eq!(right_cmd.command, "cd");
assert_eq!(right_cmd.args, vec!["test".to_string()]);
} else {
panic!("Expected Command node for right side");
}
} else {
panic!("Expected Operation node");
}
}
#[test]
fn test_complex_chain_with_precedence() {
// Test a more complex chain with different precedence levels
let cmd = "echo hello | grep e && ls -l ; echo done";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
// The tree should be structured with precedence:
// - Pipe has highest precedence
// - Then And
// - Then Semicolon (lowest)
if let ShellAst::Operation {
operator,
left,
right,
} = &ast
{
assert_eq!(*operator, Operator::Semicolon);
if let ShellAst::Operation {
operator,
left: inner_left,
right: inner_right,
} = &**left
{
assert_eq!(*operator, Operator::And);
if let ShellAst::Operation {
operator,
left: pipe_left,
right: pipe_right,
} = &**inner_left
{
assert_eq!(*operator, Operator::Pipe);
if let ShellAst::Command(cmd) = &**pipe_left {
assert_eq!(cmd.command, "echo");
assert_eq!(cmd.args, vec!["hello".to_string()]);
} else {
panic!("Expected Command node for pipe left branch");
}
if let ShellAst::Command(cmd) = &**pipe_right {
assert_eq!(cmd.command, "grep");
assert_eq!(cmd.args, vec!["e".to_string()]);
} else {
panic!("Expected Command node for pipe right branch");
}
} else {
panic!("Expected Pipe operation node");
}
if let ShellAst::Command(cmd) = &**inner_right {
assert_eq!(cmd.command, "ls");
assert_eq!(cmd.args, vec!["-l".to_string()]);
} else {
panic!("Expected Command node for and right branch");
}
} else {
panic!("Expected And operation node");
}
if let ShellAst::Command(cmd) = &**right {
assert_eq!(cmd.command, "echo");
assert_eq!(cmd.args, vec!["done".to_string()]);
} else {
panic!("Expected Command node for semicolon right branch");
}
} else {
panic!("Expected Semicolon operation node");
}
}
#[test]
fn test_stdout_redirection() {
// Test stdout redirection
let cmd = "echo hello > output.txt";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Command(shell_cmd) = ast {
assert_eq!(shell_cmd.command, "echo");
assert_eq!(shell_cmd.args, vec!["hello".to_string()]);
assert_eq!(shell_cmd.stdout_redirect, Some("output.txt".to_string()));
assert_eq!(shell_cmd.stderr_redirect, None);
} else {
panic!("Expected Command node");
}
}
#[test]
fn test_stderr_redirection() {
// Test stderr redirection
let cmd = "find / -name test 2> errors.log";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Command(shell_cmd) = ast {
assert_eq!(shell_cmd.command, "find");
assert_eq!(
shell_cmd.args,
vec!["/".to_string(), "-name".to_string(), "test".to_string()]
);
assert_eq!(shell_cmd.stdout_redirect, None);
assert_eq!(shell_cmd.stderr_redirect, Some("errors.log".to_string()));
} else {
panic!("Expected Command node");
}
}
#[test]
fn test_both_redirections() {
// Test both stdout and stderr redirection
let cmd = "make &> build.log";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Command(shell_cmd) = ast {
assert_eq!(shell_cmd.command, "make");
assert!(shell_cmd.args.is_empty());
assert_eq!(shell_cmd.stdout_redirect, Some("build.log".to_string()));
assert_eq!(shell_cmd.stderr_redirect, Some("build.log".to_string()));
} else {
panic!("Expected Command node");
}
// Test alternative syntax
let cmd = "make >& build.log";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Command(shell_cmd) = ast {
assert_eq!(shell_cmd.command, "make");
assert!(shell_cmd.args.is_empty());
assert_eq!(shell_cmd.stdout_redirect, Some("build.log".to_string()));
assert_eq!(shell_cmd.stderr_redirect, Some("build.log".to_string()));
} else {
panic!("Expected Command node");
}
}
#[test]
fn test_multiple_operators() {
// Test multiple operators in a single command
let cmd =
"find . -name \"*.rs\" | grep impl && echo \"Found implementations\" ; echo \"Done\"";
// Verify the AST structure
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Operation {
operator: semicolon_op,
left: semicolon_left,
right: semicolon_right,
} = ast
{
assert_eq!(semicolon_op, Operator::Semicolon);
if let ShellAst::Operation {
operator: and_op,
left: and_left,
right: and_right,
} = *semicolon_left
{
assert_eq!(and_op, Operator::And);
if let ShellAst::Operation {
operator: pipe_op,
left: pipe_left,
right: pipe_right,
} = *and_left
{
assert_eq!(pipe_op, Operator::Pipe);
if let ShellAst::Command(cmd) = *pipe_left {
assert_eq!(cmd.command, "find");
assert_eq!(
cmd.args,
vec![".".to_string(), "-name".to_string(), "*.rs".to_string()]
);
} else {
panic!("Expected Command node for pipe left");
}
if let ShellAst::Command(cmd) = *pipe_right {
assert_eq!(cmd.command, "grep");
assert_eq!(cmd.args, vec!["impl".to_string()]);
} else {
panic!("Expected Command node for pipe right");
}
} else {
panic!("Expected Pipe operation");
}
if let ShellAst::Command(cmd) = *and_right {
assert_eq!(cmd.command, "echo");
assert_eq!(cmd.args, vec!["Found implementations".to_string()]);
} else {
panic!("Expected Command node for and right");
}
} else {
panic!("Expected And operation");
}
if let ShellAst::Command(cmd) = *semicolon_right {
assert_eq!(cmd.command, "echo");
assert_eq!(cmd.args, vec!["Done".to_string()]);
} else {
panic!("Expected Command node for semicolon right");
}
} else {
panic!("Expected Semicolon operation at root");
}
}
#[test]
fn test_pipe_with_redirections() {
// Test pipe with redirections
let cmd = "cat file.txt | grep error > results.txt 2> errors.log";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Operation {
operator,
left,
right,
} = ast
{
assert_eq!(operator, Operator::Pipe);
if let ShellAst::Command(left_cmd) = *left {
assert_eq!(left_cmd.command, "cat");
assert_eq!(left_cmd.args, vec!["file.txt".to_string()]);
assert_eq!(left_cmd.stdout_redirect, None);
assert_eq!(left_cmd.stderr_redirect, None);
} else {
panic!("Expected Command node for left side");
}
if let ShellAst::Command(right_cmd) = *right {
assert_eq!(right_cmd.command, "grep");
assert_eq!(right_cmd.args, vec!["error".to_string()]);
assert_eq!(right_cmd.stdout_redirect, Some("results.txt".to_string()));
assert_eq!(right_cmd.stderr_redirect, Some("errors.log".to_string()));
} else {
panic!("Expected Command node for right side");
}
} else {
panic!("Expected Operation node");
}
}
#[test]
fn test_quoted_arguments() {
// Test quoted arguments
let cmd = "echo \"hello world\" | grep \"o w\"";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Operation {
operator,
left,
right,
} = ast
{
assert_eq!(operator, Operator::Pipe);
if let ShellAst::Command(left_cmd) = *left {
assert_eq!(left_cmd.command, "echo");
assert_eq!(left_cmd.args, vec!["hello world".to_string()]);
} else {
panic!("Expected Command node for left side");
}
if let ShellAst::Command(right_cmd) = *right {
assert_eq!(right_cmd.command, "grep");
assert_eq!(right_cmd.args, vec!["o w".to_string()]);
} else {
panic!("Expected Command node for right side");
}
} else {
panic!("Expected Operation node");
}
}
#[test]
fn test_unsupported_features() {
// Test unsupported shell features
let result = ShellAst::parse("echo $HOME");
assert!(result.is_err());
let result = ShellAst::parse("echo `date`");
assert!(result.is_err());
let result = ShellAst::parse("echo $(date)");
assert!(result.is_err());
let result = ShellAst::parse("for i in {1..5}; do echo $i; done");
assert!(result.is_err());
}
#[test]
fn test_complex_command() {
let cmd = "find /path/to/dir -type f -name \"*.txt\" -exec grep \"pattern with spaces\";";
let ast = ShellAst::parse(cmd).expect("parsing failed for {cmd:?}");
if let ShellAst::Command(shell_cmd) = ast {
assert_eq!(shell_cmd.command, "find");
assert_eq!(
shell_cmd.args,
vec![
"/path/to/dir".to_string(),
"-type".to_string(),
"f".to_string(),
"-name".to_string(),
"*.txt".to_string(),
"-exec".to_string(),
"grep".to_string(),
"pattern with spaces".to_string(),
]
);
assert_eq!(shell_cmd.stdout_redirect, None);
assert_eq!(shell_cmd.stderr_redirect, None);
} else {
panic!("Expected Command node");
}
}
#[test]
fn test_empty_command() {
// Test empty command
let result = ShellAst::parse("");
assert!(result.is_err());
}
#[test]
fn test_missing_redirection_target() {
// Test missing redirection target
let result = ShellAst::parse("echo hello >");
assert!(result.is_err());
let result = ShellAst::parse("ls 2>");
assert!(result.is_err());
}
#[test]
fn test_ampersand_as_argument() {
// Test & as a background operator is not allowed
let result = ShellAst::parse("grep & file.txt");
assert!(result.is_err());
// Verify the error message mentions background processes
if let Err(Error::RuntimeError(msg)) = ShellAst::parse("grep & file.txt") {
assert!(msg.contains("Background processes"));
} else {
panic!("Expected RuntimeError about background processes");
}
}
}

View File

@@ -1,3 +1,4 @@
mod sandboxed_shell;
mod session;
use project::Project;

View File

@@ -11,11 +11,15 @@ use project::{search::SearchQuery, Fs, Project};
use regex::Regex;
use std::{
cell::RefCell,
fs::File,
path::{Path, PathBuf},
process::{Command, Stdio},
sync::Arc,
};
use util::{paths::PathMatcher, ResultExt};
use crate::sandboxed_shell::{Operator, ShellAst, ShellCmd};
pub struct ScriptOutput {
pub stdout: String,
}
@@ -96,6 +100,16 @@ impl Session {
}
})?,
)?;
globals.set(
"sb_io_popen",
lua.create_function({
move |lua, shell_str| {
let mut allowed_commands = HashMap::default(); // TODO persist this
Self::io_popen(&lua, root_dir.as_ref(), shell_str, &mut allowed_commands)
}
})?,
)?;
globals.set("user_script", script)?;
lua.load(SANDBOX_PREAMBLE).exec_async().await?;
@@ -126,6 +140,399 @@ impl Session {
Ok(())
}
/// Sandboxed io.popen() function in Lua.
fn io_popen(
lua: &Lua,
root_dir: Option<&Arc<Path>>,
shell_str: mlua::String,
allowed_commands: &mut HashMap<String, bool>,
) -> mlua::Result<(Option<Table>, String)> {
let root_dir = root_dir.ok_or_else(|| {
mlua::Error::runtime("cannot execute command without a root directory")
})?;
// Parse the shell command into our AST
let ast = ShellAst::parse(shell_str.to_str()?)?;
// Create a lua file handle for the command output
let file = lua.create_table()?;
// Create a buffer to store the command output
let output_buffer = Arc::new(Mutex::new(String::new()));
// Execute the shell command based on the parsed AST
match ast {
ShellAst::Command(shell_cmd) => {
let result = Self::execute_command(&shell_cmd, root_dir, allowed_commands)?;
output_buffer.lock().push_str(&result);
}
ShellAst::Operation {
operator,
left,
right,
} => {
// Handle compound operations by recursively executing them
let left_output = Self::execute_ast_node(*left, root_dir, allowed_commands)?;
match operator {
Operator::Pipe => {
// For pipe, use left output as input to right command
let right_output = Self::execute_ast_node_with_input(
*right,
&left_output,
root_dir,
allowed_commands,
)?;
output_buffer.lock().push_str(&right_output);
}
Operator::And => {
// For AND, only execute right if left was successful (non-empty output as success indicator)
if !left_output.trim().is_empty() {
let right_output =
Self::execute_ast_node(*right, root_dir, allowed_commands)?;
output_buffer.lock().push_str(&right_output);
} else {
output_buffer.lock().push_str(&left_output);
}
}
Operator::Semicolon => {
// For semicolon, execute both regardless of result
output_buffer.lock().push_str(&left_output);
let right_output =
Self::execute_ast_node(*right, root_dir, allowed_commands)?;
output_buffer.lock().push_str(&right_output);
}
}
}
}
// Set up the file's content
file.set(
"__content",
lua.create_userdata(FileContent(RefCell::new(
output_buffer.lock().as_bytes().to_vec(),
)))?,
)?;
file.set("__position", 0usize)?;
file.set("__read_perm", true)?;
file.set("__write_perm", false)?;
// Implement the read method for the file
let read_fn = {
lua.create_function(
move |_lua, (file_userdata, format): (mlua::Table, Option<mlua::Value>)| {
let content = file_userdata.get::<mlua::AnyUserData>("__content")?;
let mut position = file_userdata.get::<usize>("__position")?;
let content_ref = content.borrow::<FileContent>()?;
let content_vec = content_ref.0.borrow();
if position >= content_vec.len() {
return Ok(None); // EOF
}
match format {
Some(mlua::Value::String(s)) => {
let format_str = s.to_string_lossy();
// Handle different read formats
if format_str.starts_with("*a") {
// Read all
let result =
String::from_utf8_lossy(&content_vec[position..]).to_string();
position = content_vec.len();
file_userdata.set("__position", position)?;
Ok(Some(result))
} else if format_str.starts_with("*l") {
// Read line
let mut line = Vec::new();
let mut found_newline = false;
while position < content_vec.len() {
let byte = content_vec[position];
position += 1;
if byte == b'\n' {
found_newline = true;
break;
}
// Handle \r\n sequence
if byte == b'\r'
&& position < content_vec.len()
&& content_vec[position] == b'\n'
{
position += 1;
found_newline = true;
break;
}
line.push(byte);
}
file_userdata.set("__position", position)?;
if !found_newline
&& line.is_empty()
&& position >= content_vec.len()
{
return Ok(None); // EOF
}
let result = String::from_utf8_lossy(&line).to_string();
Ok(Some(result))
} else {
Err(mlua::Error::runtime(format!(
"Unsupported read format: {}",
format_str
)))
}
}
Some(_) => Err(mlua::Error::runtime("Invalid format")),
None => {
// Default is to read a line
let mut line = Vec::new();
let mut found_newline = false;
while position < content_vec.len() {
let byte = content_vec[position];
position += 1;
if byte == b'\n' {
found_newline = true;
break;
}
if byte == b'\r'
&& position < content_vec.len()
&& content_vec[position] == b'\n'
{
position += 1;
found_newline = true;
break;
}
line.push(byte);
}
file_userdata.set("__position", position)?;
if !found_newline && line.is_empty() && position >= content_vec.len() {
return Ok(None); // EOF
}
let result = String::from_utf8_lossy(&line).to_string();
Ok(Some(result))
}
}
},
)?
};
file.set("read", read_fn)?;
// Implement close method
let close_fn = lua.create_function(|_lua, _: mlua::Table| Ok(true))?;
file.set("close", close_fn)?;
Ok((Some(file), String::new()))
}
// Helper function to execute a single command
fn execute_command(
cmd: &ShellCmd,
root_dir: &Arc<Path>,
allowed_commands: &mut HashMap<String, bool>,
) -> mlua::Result<String> {
// Check if command is allowed
if !allowed_commands.contains_key(&cmd.command) {
// If it's the first time we see this command, ask for permission
// In a real application, this would prompt the user, but for simplicity
// we'll just allow all commands in this sample implementation
allowed_commands.insert(cmd.command.clone(), true);
}
if !allowed_commands[&cmd.command] {
return Err(mlua::Error::runtime(format!(
"Command '{}' is not allowed in this sandbox",
cmd.command
)));
}
// Execute the command
let mut command = Command::new(&cmd.command);
// Set the current directory
command.current_dir(root_dir);
// Add arguments
command.args(&cmd.args);
// Configure stdio
command.stdin(Stdio::piped());
command.stdout(Stdio::piped());
command.stderr(Stdio::piped());
// Execute the command
let output = command
.output()
.map_err(|e| mlua::Error::runtime(format!("Failed to execute command: {}", e)))?;
let mut result = String::new();
// Handle stdout
if cmd.stdout_redirect.is_none() {
result.push_str(&String::from_utf8_lossy(&output.stdout));
} else {
// Handle file redirection
let redirect_path = root_dir.join(cmd.stdout_redirect.as_ref().unwrap());
Self::write_to_file(&redirect_path, &output.stdout)
.map_err(|e| mlua::Error::runtime(format!("Failed to redirect stdout: {}", e)))?;
}
// Handle stderr
if cmd.stderr_redirect.is_none() {
// If stderr is not redirected, append it to the result
result.push_str(&String::from_utf8_lossy(&output.stderr));
} else {
// Handle file redirection
let redirect_path = root_dir.join(cmd.stderr_redirect.as_ref().unwrap());
Self::write_to_file(&redirect_path, &output.stderr)
.map_err(|e| mlua::Error::runtime(format!("Failed to redirect stderr: {}", e)))?;
}
Ok(result)
}
// Helper function to write data to a file
fn write_to_file(path: &Path, data: &[u8]) -> std::io::Result<()> {
let mut file = File::create(path)?;
std::io::Write::write_all(&mut file, data)?;
Ok(())
}
// Helper function to execute an AST node
fn execute_ast_node(
node: ShellAst,
root_dir: &Arc<Path>,
allowed_commands: &mut HashMap<String, bool>,
) -> mlua::Result<String> {
match node {
ShellAst::Command(cmd) => Self::execute_command(&cmd, root_dir, allowed_commands),
ShellAst::Operation {
operator,
left,
right,
} => {
let left_output = Self::execute_ast_node(*left, root_dir, allowed_commands)?;
match operator {
Operator::Pipe => Self::execute_ast_node_with_input(
*right,
&left_output,
root_dir,
allowed_commands,
),
Operator::And => {
if !left_output.trim().is_empty() {
Self::execute_ast_node(*right, root_dir, allowed_commands)
} else {
Ok(left_output)
}
}
Operator::Semicolon => {
let mut result = left_output;
let right_output =
Self::execute_ast_node(*right, root_dir, allowed_commands)?;
result.push_str(&right_output);
Ok(result)
}
}
}
}
}
// Helper function to execute an AST node with input from a previous command
fn execute_ast_node_with_input(
node: ShellAst,
input: &str,
root_dir: &Arc<Path>,
allowed_commands: &mut HashMap<String, bool>,
) -> mlua::Result<String> {
match node {
ShellAst::Command(cmd) => {
// Check if command is allowed
if !allowed_commands.contains_key(&cmd.command) {
allowed_commands.insert(cmd.command.clone(), true);
}
if !allowed_commands[&cmd.command] {
return Err(mlua::Error::runtime(format!(
"Command '{}' is not allowed in this sandbox",
cmd.command
)));
}
// Execute the command with input
let mut command = Command::new(&cmd.command);
command.current_dir(root_dir);
command.args(&cmd.args);
// Configure stdio
command.stdin(Stdio::piped());
command.stdout(Stdio::piped());
command.stderr(Stdio::piped());
let mut child = command.spawn().map_err(|e| {
mlua::Error::runtime(format!("Failed to execute command: {}", e))
})?;
// Write input to stdin
if let Some(mut stdin) = child.stdin.take() {
std::io::Write::write_all(&mut stdin, input.as_bytes()).map_err(|e| {
mlua::Error::runtime(format!("Failed to write to stdin: {}", e))
})?;
// Stdin is closed when it goes out of scope
}
let output = child.wait_with_output().map_err(|e| {
mlua::Error::runtime(format!("Failed to wait for command: {}", e))
})?;
let mut result = String::new();
// Handle stdout
if cmd.stdout_redirect.is_none() {
result.push_str(&String::from_utf8_lossy(&output.stdout));
} else {
// Handle file redirection
let redirect_path = root_dir.join(cmd.stdout_redirect.as_ref().unwrap());
Self::write_to_file(&redirect_path, &output.stdout).map_err(|e| {
mlua::Error::runtime(format!("Failed to redirect stdout: {}", e))
})?;
}
// Handle stderr
if cmd.stderr_redirect.is_none() {
result.push_str(&String::from_utf8_lossy(&output.stderr));
} else {
// Handle file redirection
let redirect_path = root_dir.join(cmd.stderr_redirect.as_ref().unwrap());
Self::write_to_file(&redirect_path, &output.stderr).map_err(|e| {
mlua::Error::runtime(format!("Failed to redirect stderr: {}", e))
})?;
}
Ok(result)
}
ShellAst::Operation { .. } => {
// For complex operations, we'd need to create temporary files for intermediate results
// For simplicity, we'll return an error for now
Err(mlua::Error::runtime(
"Nested operations in pipes are not supported",
))
}
}
}
/// Sandboxed io.open() function in Lua.
fn io_open(
lua: &Lua,