xref: /NovaShell/src/parser.rs (revision cb835e03e4e256c633660e88ed6ed9d7b342ae0d)
1 use std::{
2     collections::HashMap,
3     io::ErrorKind,
4     os::fd::{AsRawFd, FromRawFd},
5     process::{Child, ChildStdout, Stdio},
6     sync::{Arc, Mutex},
7 };
8 
9 use regex::Regex;
10 
11 use crate::env::EnvManager;
12 
13 #[derive(Debug)]
14 pub enum Token {
15     Word(String),   // 普通的命令或选项
16     Symbol(String), // 特殊符号
17 }
18 
19 #[derive(Debug, Clone)]
20 pub enum CommandType {
21     Simple, // 简单命令
22     Redirect {
23         target: RedirectTarget,
24         mode: RedirectMode,
25     }, // 重定向命令
26     Pipe,   // 管道命令
27 }
28 
29 #[derive(Debug, Clone, PartialEq, Eq, Copy)]
30 pub enum ConnectType {
31     Simple, // 普通连接
32     And,    // 与连接
33     Or,     // 或连接
34 }
35 
36 #[derive(Debug, Clone)]
37 pub struct Command {
38     name: String,
39     args: Vec<String>,
40     cmd_type: CommandType,
41     conn_type: ConnectType,
42 }
43 
44 impl Command {
new( name: &String, args: &[String], cmd_type: CommandType, conn_type: ConnectType, ) -> Command45     pub fn new(
46         name: &String,
47         args: &[String],
48         cmd_type: CommandType,
49         conn_type: ConnectType,
50     ) -> Command {
51         Self {
52             name: name.clone(),
53             args: args.to_vec(),
54             cmd_type,
55             conn_type,
56         }
57     }
58 
execute(&self)59     pub fn execute(&self) {}
60 }
61 
62 #[derive(Debug, Clone)]
63 pub enum RedirectTarget {
64     File(String),
65     FileDiscriptor(i32),
66 }
67 
68 impl RedirectTarget {
from_string(str: &String) -> Option<RedirectTarget>69     pub fn from_string(str: &String) -> Option<RedirectTarget> {
70         if str.starts_with("&") {
71             if let Ok(fd) = str.split_at(1).1.parse::<i32>() {
72                 Some(RedirectTarget::FileDiscriptor(fd))
73             } else {
74                 None
75             }
76         } else {
77             Some(RedirectTarget::File(str.clone()))
78         }
79     }
80 }
81 
82 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
83 pub enum RedirectMode {
84     Overwrite,
85     Append,
86 }
87 
88 impl RedirectMode {
from_string(str: &String) -> Option<RedirectMode>89     pub fn from_string(str: &String) -> Option<RedirectMode> {
90         match str.as_str() {
91             ">" => Some(RedirectMode::Overwrite),
92             ">>" => Some(RedirectMode::Append),
93             _ => None,
94         }
95     }
96 }
97 
98 #[derive(Debug, Clone)]
99 pub enum ParseError {
100     UnexpectedInput(String),
101     UnsupportedToken(String),
102     UnexpectedToken(String),
103 }
104 
105 impl ParseError {
handle(&self)106     pub fn handle(&self) {
107         match self {
108             ParseError::UnexpectedInput(str) => eprintln!("Unexpected input: \"{str}\""),
109             ParseError::UnsupportedToken(str) => eprintln!("Unsupported token: \"{str}\""),
110             ParseError::UnexpectedToken(str) => eprintln!("Unexpected token: \"{str}\""),
111         }
112     }
113 }
114 
115 pub struct Parser;
116 
117 impl Parser {
parse_env(str: &str) -> String118     fn parse_env(str: &str) -> String {
119         std::env::var(str).unwrap_or(String::new())
120     }
121 
lexer(input: &str) -> Result<Vec<Token>, ParseError>122     fn lexer(input: &str) -> Result<Vec<Token>, ParseError> {
123         let mut tokens = Vec::new();
124 
125         // 匹配环境变量的正则表达式
126         let env_token = Regex::new(r#"\$\{(\w[\w\d_]*)\}"#).unwrap();
127 
128         // 使用具体的符号组合来匹配
129         let regex_token =
130             Regex::new(r#"([^'";|&$\s]+)|(["'].*?["'])|(&&|\|\||<<|>>|[<>|&;])"#).unwrap();
131 
132         // 预先替换"${}"包围的环境变量
133         let remaining_input = env_token
134             .replace_all(input, |captures: &regex::Captures| {
135                 Self::parse_env(&captures[1])
136             })
137             .into_owned();
138 
139         let mut remaining_input = remaining_input.trim();
140 
141         while !remaining_input.is_empty() {
142             if let Some(mat) = regex_token.find(remaining_input) {
143                 let token_str = mat.as_str();
144                 if token_str.starts_with('"') || token_str.starts_with('\'') {
145                     tokens.push(Token::Word(token_str[1..token_str.len() - 1].to_string()));
146                 } else if token_str.starts_with('$') {
147                     tokens.push(Token::Word(Self::parse_env(&token_str[1..])));
148                 } else if token_str == ">>"
149                     || token_str == ">"
150                     || token_str == "<<"
151                     || token_str == "<"
152                     || token_str == "|"
153                     || token_str == "&"
154                     || token_str == ";"
155                     || token_str == "&&"
156                     || token_str == "||"
157                 {
158                     if token_str == "<" || token_str == "<<" {
159                         return Err(ParseError::UnsupportedToken(token_str.to_string()));
160                     }
161                     tokens.push(Token::Symbol(token_str.to_string()));
162                 } else {
163                     tokens.push(Token::Word(token_str.to_string()));
164                 }
165 
166                 remaining_input = &remaining_input[mat.end()..].trim();
167             } else {
168                 return Err(ParseError::UnexpectedInput(remaining_input.to_string()));
169             }
170         }
171         Ok(tokens)
172     }
173 
parser(tokens: Vec<Token>) -> Result<Vec<Pipeline>, ParseError>174     fn parser(tokens: Vec<Token>) -> Result<Vec<Pipeline>, ParseError> {
175         let mut commands = Vec::new();
176         let mut current_command: Vec<String> = Vec::new();
177         let mut pipelines = Vec::new();
178         let mut redirect_object: (Option<RedirectMode>, Option<RedirectTarget>) = (None, None);
179 
180         for token in tokens {
181             match token {
182                 Token::Word(ref word) => {
183                     if let (Some(_), None) = redirect_object {
184                         redirect_object.1 = RedirectTarget::from_string(word);
185                     } else {
186                         current_command.push(word.to_string());
187                     }
188                 }
189 
190                 Token::Symbol(symbol) => {
191                     match symbol.as_str() {
192                         ">" | ">>" => {
193                             // 重定向符号不能重复出现
194                             if redirect_object.0.is_some() {
195                                 return Err(ParseError::UnexpectedToken(symbol));
196                             } else {
197                                 redirect_object.0 = RedirectMode::from_string(&symbol);
198                             }
199                         }
200                         "|" | "&" | "||" | "&&" | ";" => {
201                             if let Some((name, args)) = current_command.split_first() {
202                                 let mut cmd_type =
203                                     if let (Some(mode), Some(ref target)) = redirect_object {
204                                         CommandType::Redirect {
205                                             target: target.clone(),
206                                             mode,
207                                         }
208                                     } else {
209                                         CommandType::Simple
210                                     };
211 
212                                 let conn_type = match symbol.as_str() {
213                                     "|" => {
214                                         // 重定向优先级高于管道
215                                         if let CommandType::Simple = cmd_type {
216                                             cmd_type = CommandType::Pipe;
217                                         }
218                                         ConnectType::Simple
219                                     }
220                                     "&" | ";" => ConnectType::Simple,
221                                     "||" => ConnectType::Or,
222                                     "&&" => ConnectType::And,
223                                     _ => todo!(),
224                                 };
225 
226                                 commands.push(Command::new(name, args, cmd_type, conn_type));
227                                 current_command.clear();
228 
229                                 if symbol == "&" {
230                                     pipelines.push(Pipeline::new(&commands, true));
231                                     commands.clear();
232                                 }
233                             } else {
234                                 // 这些符号之前必须有word作为命令被分隔,否则这些符号是没有意义的
235                                 return Err(ParseError::UnexpectedToken(symbol));
236                             }
237                         }
238                         _ => todo!(),
239                     }
240                 }
241             }
242         }
243 
244         // 处理最后一个命令
245         if let Some((name, args)) = current_command.split_first() {
246             commands.push(Command::new(
247                 name,
248                 args,
249                 if let (Some(mode), Some(ref target)) = redirect_object {
250                     CommandType::Redirect {
251                         target: target.clone(),
252                         mode,
253                     }
254                 } else {
255                     CommandType::Simple
256                 },
257                 ConnectType::Simple,
258             ));
259         }
260 
261         if !commands.is_empty() {
262             pipelines.push(Pipeline::new(&commands, false));
263         }
264 
265         Ok(pipelines)
266     }
267 
parse(input: &str) -> Result<Vec<Pipeline>, ParseError>268     pub fn parse(input: &str) -> Result<Vec<Pipeline>, ParseError> {
269         // 解析输入并生成token列表
270         let tokens = Self::lexer(input)?;
271         // println!("tokens: {tokens:?}");
272 
273         // 解析 tokens 生成命令流水线
274         Self::parser(tokens)
275     }
276 }
277 
278 #[allow(dead_code)]
279 #[derive(Debug)]
280 pub struct ExecuteError {
281     name: String,
282     err_type: ExecuteErrorType,
283 }
284 
285 impl ExecuteError {
handle(&self, prompt: Option<String>)286     pub fn handle(&self, prompt: Option<String>) {
287         if let Some(prompt) = prompt {
288             eprint!("{}: ", prompt);
289         }
290         eprint!("{}: ", self.name);
291         match &self.err_type {
292             ExecuteErrorType::CommandNotFound => eprintln!("Command not found"),
293             ExecuteErrorType::FileNotFound(file) => eprintln!("Not a file or directory: {}", file),
294             ExecuteErrorType::NotDir(ref path) => eprintln!("Not a Directory: {path}"),
295             ExecuteErrorType::NotFile(ref path) => eprintln!("Not a File: {path}"),
296             ExecuteErrorType::PermissionDenied(ref file) => eprintln!("File open denied: {file}"),
297             ExecuteErrorType::ExecuteFailed => eprintln!("Command execute failed"),
298             ExecuteErrorType::ExitWithCode(exit_code) => {
299                 eprintln!("Command exit with code: {}", exit_code)
300             }
301             ExecuteErrorType::ProcessTerminated => eprintln!("Process terminated"),
302             ExecuteErrorType::FileOpenFailed(file) => {
303                 eprintln!("File open failed: {}", file.clone())
304             }
305             ExecuteErrorType::TooManyArguments => eprintln!("Too many arguments"),
306             ExecuteErrorType::TooFewArguments => eprintln!("Too few arguments"),
307             ExecuteErrorType::InvalidArgument(arg) => eprintln!("Invalid argument: {}", arg),
308         }
309     }
310 }
311 
312 #[allow(dead_code)]
313 #[derive(Debug, Clone)]
314 pub enum ExecuteErrorType {
315     CommandNotFound,
316     FileNotFound(String),
317     NotDir(String),
318     NotFile(String),
319     PermissionDenied(String),
320     ExecuteFailed,
321     ProcessTerminated,
322     ExitWithCode(i32),
323     FileOpenFailed(String),
324     TooManyArguments,
325     TooFewArguments,
326     InvalidArgument(String),
327 }
328 
329 pub enum RedirectStdout {
330     Stdout(Option<ChildStdout>),
331     RawPipe(i32),
332 }
333 
334 impl RedirectStdout {
as_raw_fd(&mut self) -> i32335     pub fn as_raw_fd(&mut self) -> i32 {
336         match self {
337             RedirectStdout::Stdout(child_stdout) => child_stdout.take().unwrap().as_raw_fd(),
338             RedirectStdout::RawPipe(fd) => *fd,
339         }
340     }
341 
as_std(&mut self) -> Stdio342     pub fn as_std(&mut self) -> Stdio {
343         match self {
344             RedirectStdout::Stdout(child_stdout) => Stdio::from(child_stdout.take().unwrap()),
345             RedirectStdout::RawPipe(fd) => unsafe { Stdio::from_raw_fd(*fd) },
346         }
347     }
348 }
349 
350 impl From<i32> for RedirectStdout {
from(value: i32) -> Self351     fn from(value: i32) -> Self {
352         RedirectStdout::RawPipe(value)
353     }
354 }
355 
356 impl From<Option<ChildStdout>> for RedirectStdout {
from(mut value: Option<ChildStdout>) -> Self357     fn from(mut value: Option<ChildStdout>) -> Self {
358         RedirectStdout::Stdout(value.take())
359     }
360 }
361 
362 #[derive(Debug)]
363 pub struct Pipeline {
364     commands: Vec<Command>, // 存储一系列命令
365     backend: bool,
366 }
367 
368 type CommandMap = HashMap<String, fn(&Vec<String>) -> Result<(), ExecuteErrorType>>;
369 
370 impl Pipeline {
new(commands: &Vec<Command>, backend: bool) -> Pipeline371     pub fn new(commands: &Vec<Command>, backend: bool) -> Pipeline {
372         Self {
373             commands: commands.to_vec(),
374             backend,
375         }
376     }
377 
execute(&self, internal_commands: Option<Arc<Mutex<CommandMap>>>) -> Vec<Child>378     pub fn execute(&self, internal_commands: Option<Arc<Mutex<CommandMap>>>) -> Vec<Child> {
379         // 前一个命令是否为管道输出
380         let mut stdout: Option<RedirectStdout> = None;
381         // 提前推断下条命令的布尔值,为None代表下条命令需要运行
382         let mut result_next: Option<bool> = None;
383         let mut children: Vec<Child> = Vec::new();
384         let mut err: Option<ExecuteErrorType> = None;
385 
386         for cmd in self.commands.iter() {
387             if let Some(result) = result_next {
388                 // 如果前面已经推导出本条命令的布尔值,则本条命令不需要执行,并继续推断下条命令
389                 if (result && cmd.conn_type == ConnectType::And)
390                     || (!result && cmd.conn_type == ConnectType::Or)
391                 {
392                     // 如果true遇到||或false遇到&&,则下条命令的布尔值相同
393                     // 如果true遇到&&或false遇到||,继承中断,设为None以执行后续命令
394                     result_next = None;
395                 }
396                 continue;
397             }
398 
399             let mut internal = false;
400             if let Some(ref map) = internal_commands {
401                 let map = map.lock().unwrap();
402                 if let Some(f) = map.get(&cmd.name) {
403                     // 找到内部命令,优先执行,设置标记
404                     internal = true;
405 
406                     // child_fd
407                     let child_fd = if self.backend {
408                         unsafe { libc::fork() }
409                     } else {
410                         0
411                     };
412 
413                     // 为子进程或前台运行
414                     if child_fd == 0 {
415                         let mut old_stdin: Option<i32> = None;
416                         let mut old_stdout: Option<i32> = None;
417 
418                         // 如果上条命令为管道,将标准输入重定向
419                         if let Some(mut redirect_stdout) = stdout {
420                             unsafe {
421                                 old_stdin = Some(libc::dup(libc::STDIN_FILENO));
422                                 libc::dup2(redirect_stdout.as_raw_fd(), libc::STDIN_FILENO);
423                                 stdout = None;
424                             }
425                         }
426 
427                         // 根据命令类型重定向标准输出
428                         match cmd.cmd_type {
429                             CommandType::Simple => {}
430                             CommandType::Pipe => unsafe {
431                                 let mut pipe: [i32; 2] = [0; 2];
432                                 libc::pipe2(pipe.as_mut_ptr(), libc::O_CLOEXEC);
433                                 stdout = Some(RedirectStdout::from(pipe[0]));
434 
435                                 old_stdout = Some(libc::dup(libc::STDOUT_FILENO));
436 
437                                 libc::dup2(pipe[1], libc::STDOUT_FILENO);
438                             },
439                             CommandType::Redirect {
440                                 ref target,
441                                 ref mode,
442                             } => unsafe {
443                                 let mut pipe: [i32; 2] = [0; 2];
444                                 libc::pipe2(pipe.as_mut_ptr(), libc::O_CLOEXEC);
445                                 stdout = Some(RedirectStdout::from(pipe[0]));
446 
447                                 old_stdout = Some(libc::dup(libc::STDOUT_FILENO));
448 
449                                 let append = match mode {
450                                     RedirectMode::Overwrite => false,
451                                     RedirectMode::Append => true,
452                                 };
453 
454                                 match target {
455                                     RedirectTarget::File(file) => {
456                                         match std::fs::OpenOptions::new()
457                                             .write(true)
458                                             .append(append)
459                                             .create(true)
460                                             .open(file)
461                                         {
462                                             Ok(file) => {
463                                                 libc::dup2(file.as_raw_fd(), libc::STDIN_FILENO);
464                                             }
465 
466                                             Err(_) => {
467                                                 err = Some(ExecuteErrorType::FileOpenFailed(
468                                                     file.clone(),
469                                                 ));
470                                             }
471                                         };
472                                     }
473                                     RedirectTarget::FileDiscriptor(fd) => {
474                                         libc::dup2(*fd, libc::STDIN_FILENO);
475                                     }
476                                 }
477                             },
478                         }
479 
480                         // 如果之前没有出错,执行命令
481                         if err.is_none() {
482                             if let Err(err_type) = f(&cmd.args) {
483                                 err = Some(err_type);
484                             }
485                         }
486 
487                         // 还原标准输出
488                         unsafe {
489                             if let Some(old_stdin) = old_stdin {
490                                 libc::dup2(old_stdin, libc::STDIN_FILENO);
491                             }
492 
493                             if let Some(old_stdout) = old_stdout {
494                                 libc::dup2(old_stdout, libc::STDOUT_FILENO);
495                             }
496                         }
497 
498                         if self.backend {
499                             // 当前为后台进程,退出当前进程
500                             std::process::exit(if err.is_none() { 0 } else { 1 });
501                         }
502                     } else if child_fd > 0 {
503                         // 当前进程为父进程
504                         unsafe {
505                             // 设置前台进程
506                             libc::tcsetpgrp(libc::STDIN_FILENO, child_fd);
507 
508                             let mut status = 0;
509                             err = match libc::waitpid(child_fd, &mut status, 0) {
510                                 -1 => Some(ExecuteErrorType::ExecuteFailed),
511                                 _ => None,
512                             };
513 
514                             if status != 0 {
515                                 if libc::WIFEXITED(status) {
516                                     if libc::WEXITSTATUS(status) != 0 {
517                                         err = Some(ExecuteErrorType::ExitWithCode(status));
518                                     }
519                                 } else if libc::WIFSIGNALED(status) {
520                                     err = Some(ExecuteErrorType::ProcessTerminated);
521                                 }
522                             }
523 
524                             // 还原前台进程
525                             libc::tcsetpgrp(libc::STDIN_FILENO, std::process::id() as i32);
526                         }
527                     } else {
528                         err = Some(ExecuteErrorType::ExecuteFailed)
529                     }
530                 }
531             };
532 
533             // 没找到执行内部命令的标记,尝试作为外部命令执行
534             if !internal {
535                 let path = if cmd.name.contains('/') {
536                     // 为路径,获取规范的绝对路径
537                     if let Ok(path) = std::fs::canonicalize(&cmd.name) {
538                         if path.is_file() {
539                             Ok(path)
540                         } else {
541                             // 路径不为文件,返回错误
542                             Err(ExecuteErrorType::NotFile(cmd.name.clone()))
543                         }
544                     } else {
545                         Err(ExecuteErrorType::CommandNotFound)
546                     }
547                 } else {
548                     // 不为路径,从环境变量中查找命令
549                     which::which(&cmd.name).map_err(|_| ExecuteErrorType::CommandNotFound)
550                 };
551 
552                 // println!("path: {:?}", path);
553 
554                 match path {
555                     Err(e) => err = Some(e),
556                     Ok(real_path) => {
557                         let mut child_command = std::process::Command::new(real_path);
558                         child_command.args(cmd.args.clone());
559                         child_command.current_dir(EnvManager::current_dir());
560                         if stdout.is_some() {
561                             child_command.stdin(stdout.take().unwrap().as_std());
562                         }
563 
564                         match &cmd.cmd_type {
565                             CommandType::Simple => {}
566                             CommandType::Redirect { target, mode } => {
567                                 let append = match mode {
568                                     RedirectMode::Overwrite => false,
569                                     RedirectMode::Append => true,
570                                 };
571                                 match target {
572                                     RedirectTarget::File(file) => {
573                                         match std::fs::OpenOptions::new()
574                                             .write(true)
575                                             .append(append)
576                                             .create(true)
577                                             .open(file)
578                                         {
579                                             Ok(file) => {
580                                                 child_command.stdout(file);
581                                             }
582                                             Err(_) => {
583                                                 err = Some(ExecuteErrorType::FileOpenFailed(
584                                                     file.clone(),
585                                                 ));
586                                             }
587                                         };
588                                     }
589                                     RedirectTarget::FileDiscriptor(fd) => {
590                                         child_command.stdout(unsafe { Stdio::from_raw_fd(*fd) });
591                                     }
592                                 }
593                             }
594                             CommandType::Pipe => {
595                                 // 标准输出重定向到管道
596                                 child_command.stdout(Stdio::piped());
597                             }
598                         }
599 
600                         if err.is_none() {
601                             match child_command.spawn() {
602                                 Ok(mut child) => {
603                                     // 如果为管道命令,记录下来
604                                     if let CommandType::Pipe = cmd.cmd_type {
605                                         stdout = Some(RedirectStdout::Stdout(child.stdout.take()));
606                                     }
607 
608                                     // println!("exec command: {child_command:#?}");
609 
610                                     unsafe {
611                                         // 设置前台进程
612                                         libc::tcsetpgrp(libc::STDIN_FILENO, child.id() as i32);
613                                     };
614 
615                                     match child.wait() {
616                                         Ok(exit_status) => match exit_status.code() {
617                                             Some(exit_code) => {
618                                                 if exit_code != 0 {
619                                                     err = Some(ExecuteErrorType::ExitWithCode(
620                                                         exit_code,
621                                                     ));
622                                                 }
623                                             }
624                                             None => err = Some(ExecuteErrorType::ProcessTerminated),
625                                         },
626                                         Err(_) => err = Some(ExecuteErrorType::ExecuteFailed),
627                                     };
628 
629                                     // 还原前台进程
630                                     unsafe {
631                                         libc::tcsetpgrp(
632                                             libc::STDIN_FILENO,
633                                             std::process::id() as i32,
634                                         );
635                                     }
636 
637                                     children.push(child);
638                                 }
639 
640                                 Err(e) => match e.kind() {
641                                     ErrorKind::PermissionDenied => {
642                                         err = Some(ExecuteErrorType::PermissionDenied(
643                                             cmd.name.clone(),
644                                         ))
645                                     }
646                                     _ => eprintln!("Error occurred: {}", e.kind()),
647                                 },
648                             }
649                         }
650                     }
651                 }
652             }
653 
654             // 预计算下条命令的结果
655             result_next = match err {
656                 Some(ref e) => {
657                     ExecuteError {
658                         name: cmd.name.clone(),
659                         err_type: e.clone(),
660                     }
661                     .handle(if internal {
662                         Some("internal command".to_string())
663                     } else {
664                         None
665                     });
666                     if cmd.conn_type == ConnectType::And {
667                         Some(false)
668                     } else {
669                         None
670                     }
671                 }
672                 None => {
673                     if cmd.conn_type == ConnectType::Or {
674                         Some(true)
675                     } else {
676                         None
677                     }
678                 }
679             }
680         }
681 
682         children
683     }
684 
backend(&self) -> bool685     pub fn backend(&self) -> bool {
686         self.backend
687     }
688 }
689