diff --git a/src/arguments.rs b/src/arguments.rs index e2ad5fa..8a7b609 100644 --- a/src/arguments.rs +++ b/src/arguments.rs @@ -1,4 +1,5 @@ -use clap::{Parser, ValueEnum}; +use clap::Parser; +use clap::ValueEnum; use std::fmt; #[derive(Clone, Debug, ValueEnum)] @@ -82,8 +83,8 @@ impl fmt::Display for NumberRows { } } -#[derive(Parser, Debug)] -pub struct Arguments { +#[derive(Parser, Debug, Clone)] +pub struct RawArguments { /// Path to the xlsx file #[arg()] pub file: String, @@ -124,3 +125,83 @@ pub struct Arguments { #[arg(long)] pub filter: Option, } + +#[derive(Debug, Clone)] +pub struct Arguments { + /// Path to the xlsx file + pub file: String, + /// List all worksheets + pub list_worksheets: bool, + /// Separator + pub separator: char, + /// Replace separator char in cells by + pub replace_separator_by: Option, + /// include hidden lines to output + pub include_hidden: IncludeHidden, + pub include_hidden_rows: bool, + pub include_hidden_columns: bool, + /// If merged cells, fill horizontally, vertically, both, or none + pub fill_merged_cells: FillMergedCells, + pub fill_merged_cells_horizontal: bool, + pub fill_merged_cells_vertical: bool, + /// Chosse worksheet + pub worksheet: String, + /// Trim white spaces at end of cells + pub trim: TrimSpaces, + /// number the rows in first cell of each line + pub number_rows: NumberRows, + /// avoid nth first rows of xlsx file + pub skip_rows: u32, + /// change end of line character + pub end_of_line: String, + /// replacement for end of line character inside cells + pub replace_end_of_line_by: Option, + /// a rhai function to filter lines + pub filter: Option, +} + +impl Arguments { + pub fn parse() -> Self { + let raw = RawArguments::parse(); + raw.into() + } +} + +impl From for Arguments { + fn from(raw: RawArguments) -> Self { + let (include_hidden_columns, include_hidden_rows) = match raw.include_hidden { + IncludeHidden::None => (false, false), + IncludeHidden::Rows => (false, true), + IncludeHidden::Columns => (true, false), + IncludeHidden::Both => (true, true), + }; + + let (fill_merged_cells_horizontal, fill_merged_cells_vertical) = match raw.fill_merged_cells + { + FillMergedCells::None => (false, false), + FillMergedCells::Horizontal => (true, false), + FillMergedCells::Vertical => (false, true), + FillMergedCells::Both => (true, true), + }; + + Arguments { + file: raw.file, + list_worksheets: raw.list_worksheets, + separator: raw.separator, + replace_separator_by: raw.replace_separator_by, + include_hidden: raw.include_hidden, + include_hidden_columns, + include_hidden_rows, + fill_merged_cells: raw.fill_merged_cells, + fill_merged_cells_horizontal, + fill_merged_cells_vertical, + worksheet: raw.worksheet, + trim: raw.trim, + number_rows: raw.number_rows, + skip_rows: raw.skip_rows, + end_of_line: raw.end_of_line, + replace_end_of_line_by: raw.replace_end_of_line_by, + filter: raw.filter, + } + } +} diff --git a/src/error.rs b/src/error.rs index 7b5c6af..51700ab 100644 --- a/src/error.rs +++ b/src/error.rs @@ -20,3 +20,15 @@ impl From for Error { Error::XlsxError(value.to_string()) } } + +impl From<&str> for Error { + fn from(value: &str) -> Self { + Error::Msg(value.to_string()) + } +} + +impl From for Error { + fn from(value: String) -> Self { + Error::Msg(value) + } +} diff --git a/src/lib.rs b/src/lib.rs index 28498e8..8d16fa1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,3 @@ +pub mod arguments; pub mod error; -pub mod xlsxtocsv; +pub mod xlsx; diff --git a/src/main.rs b/src/main.rs index c6fa906..7426b6e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,4 @@ -pub mod arguments; - -use arguments::Arguments; -use clap::Parser; -use xlsxtocsv::xlsxtocsv; +use xlsxtocsv::{arguments::Arguments, xlsx::xlsxtocsv}; fn main() { let args = Arguments::parse(); diff --git a/src/xlsxtocsv.rs b/src/xlsx.rs similarity index 81% rename from src/xlsxtocsv.rs rename to src/xlsx.rs index 04cb592..b6eea9b 100644 --- a/src/xlsxtocsv.rs +++ b/src/xlsx.rs @@ -2,9 +2,7 @@ use std::io::{BufWriter, Write, stdout}; use std::path::Path; use umya_spreadsheet::{Cell, Range, Worksheet, reader}; -//use rhai:: - -use crate::arguments::{Arguments, IncludeHidden, NumberRows, TrimSpaces}; +use crate::arguments::{Arguments, NumberRows, TrimSpaces}; use crate::error::Error; pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> { @@ -20,37 +18,26 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> { return Ok(()); } - let (include_hidden_columns, include_hidden_rows) = match args.include_hidden { - IncludeHidden::None => (false, false), - IncludeHidden::Rows => (false, true), - IncludeHidden::Columns => (true, false), - IncludeHidden::Both => (true, true), - }; - let sheet = match book.get_sheet_by_name(&args.worksheet) { Some(sheet) => sheet, None => { let sheetnum: u32 = match args.worksheet.parse() { Ok(sheetnum) => sheetnum, - Err(_) => return Err(Error::new("cannot open sheet")), + Err(_) => return Err("cannot open sheet".into()), }; match book.get_sheet(&(sheetnum as usize)) { Some(sheet) => sheet, - None => return Err(Error::new("cannot open sheet")), + None => return Err("cannot open sheet".into()), } } }; - // set the merged cells policy - let (horiz, vert) = match args.fill_merged_cells { - crate::arguments::FillMergedCells::None => (false, false), - crate::arguments::FillMergedCells::Horizontal => (true, false), - crate::arguments::FillMergedCells::Vertical => (false, true), - crate::arguments::FillMergedCells::Both => (true, true), - }; - // get all the merged cells - let merged_cells = MergedCells::new(sheet, horiz, vert); + let merged_cells = MergedCells::new( + sheet, + args.fill_merged_cells_horizontal, + args.fill_merged_cells_vertical, + ); // get non-empty value size of the worksheet let mut num_cols = 0; @@ -80,7 +67,7 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> { get hidden columns if needed */ let mut hidden_columns: Vec = Vec::new(); - if !include_hidden_columns { + if !args.include_hidden_columns { for i in 1..=num_cols { if let Some(dim) = sheet.get_column_dimension_by_number(&i) && *dim.get_hidden() @@ -98,7 +85,7 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> { empty_row += args.end_of_line.as_str(); if args.skip_rows > num_rows { - return Err(Error::new("Number of rows < number of rows to skip")); + return Err("Number of rows < number of rows to skip".into()); } let stdout = stdout(); @@ -110,7 +97,7 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> { let mut line = String::from(""); // take hidden rows if asked for - if !include_hidden_rows { + if !args.include_hidden_rows { match sheet.get_row_dimension(&i) { Some(dim) => { if *dim.get_hidden() { @@ -175,24 +162,20 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> { if let Some(ref replacement) = args.replace_separator_by { value = value.replace(args.separator, replacement); } else if value.contains(args.separator) { - return Err(Error::new( - format!( - "Cell {} contains separator char, use -r to choose a replacement char", - cell.get_coordinate().get_coordinate() - ) - .as_str(), - )); + return Err(format!( + "Cell {} contains separator char, use -r to choose a replacement char", + cell.get_coordinate().get_coordinate() + ) + .into()); } if let Some(ref replacement) = args.replace_end_of_line_by { value = value.replace(&args.end_of_line, replacement); } else if value.contains(&args.end_of_line) { - return Err(Error::new( - format!( - "Cell {} contains end of line string, use -R to choose a replacement string", - cell.get_coordinate().get_coordinate() - ) - .as_str(), - )); + return Err(format!( + "Cell {} contains end of line string, use -R to choose a replacement string", + cell.get_coordinate().get_coordinate() + ) + .into()); } line += value.as_str(); } diff --git a/src/xlsxtocsv_refactored.rs b/src/xlsxtocsv_refactored.rs deleted file mode 100644 index 716435e..0000000 --- a/src/xlsxtocsv_refactored.rs +++ /dev/null @@ -1,373 +0,0 @@ -use std::io::{BufWriter, Write, stdout}; -use std::path::Path; -use umya_spreadsheet::{Cell, Range, Spreadsheet, Worksheet, reader}; - -use crate::arguments::{Arguments, FillMergedCells, IncludeHidden, NumberRows, TrimSpaces}; -use crate::error::Error; - -pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> { - let book = reader::xlsx::read(Path::new(&args.file)) - .unwrap_or_else(|_| panic!("Can't open {}", args.file)); - - if args.list_worksheets { - list_worksheets(&book); - return Ok(()); - } - - let sheet = get_sheet(&book, &args.worksheet)?; - let config = ProcessingConfig::from_args(args); - - process_sheet(sheet, &config, args)?; - - Ok(()) -} - -// === Configuration === - -struct ProcessingConfig { - include_hidden_columns: bool, - include_hidden_rows: bool, - fill_horizontal: bool, - fill_vertical: bool, -} - -impl ProcessingConfig { - fn from_args(args: &Arguments) -> Self { - let (include_hidden_columns, include_hidden_rows) = match args.include_hidden { - IncludeHidden::None => (false, false), - IncludeHidden::Rows => (false, true), - IncludeHidden::Columns => (true, false), - IncludeHidden::Both => (true, true), - }; - - let (fill_horizontal, fill_vertical) = match args.fill_merged_cells { - FillMergedCells::None => (false, false), - FillMergedCells::Horizontal => (true, false), - FillMergedCells::Vertical => (false, true), - FillMergedCells::Both => (true, true), - }; - - Self { - include_hidden_columns, - include_hidden_rows, - fill_horizontal, - fill_vertical, - } - } -} - -// === Sheet Selection === - -fn list_worksheets(book: &Spreadsheet) { - println!("List of worksheets :"); - let sheets = book.get_sheet_collection(); - for (i, sheet) in sheets.iter().enumerate() { - println!(" {:3}: {}", i, sheet.get_name()); - } -} - -fn get_sheet<'a>(book: &'a Spreadsheet, worksheet_name: &str) -> Result<&'a Worksheet, Error> { - if let Some(sheet) = book.get_sheet_by_name(worksheet_name) { - return Ok(sheet); - } - - let sheetnum: u32 = worksheet_name - .parse() - .map_err(|_| Error::new("cannot open sheet"))?; - - book.get_sheet(&(sheetnum as usize)) - .ok_or_else(|| Error::new("cannot open sheet")) -} - -// === Dimensions === - -struct SheetDimensions { - num_cols: u32, - num_rows: u32, -} - -fn calculate_dimensions(sheet: &Worksheet) -> SheetDimensions { - let mut num_cols = 0; - let mut num_rows = 0; - - for cell in sheet.get_cell_collection() { - let value = get_value(cell); - if value.is_empty() { - continue; - } - - let coord = cell.get_coordinate(); - let col_num = *coord.get_col_num(); - let row_num = *coord.get_row_num(); - - if col_num > num_cols { - num_cols = col_num; - } - if row_num > num_rows { - num_rows = row_num; - } - } - - SheetDimensions { num_cols, num_rows } -} - -// === Hidden Columns === - -fn get_hidden_columns(sheet: &Worksheet, num_cols: u32, include_hidden: bool) -> Vec { - if include_hidden { - return Vec::new(); - } - - (1..=num_cols) - .filter(|&i| { - sheet - .get_column_dimension_by_number(&i) - .map(|dim| *dim.get_hidden()) - .unwrap_or(false) - }) - .collect() -} - -// === Row Processing === - -fn is_row_hidden(sheet: &Worksheet, row_num: u32) -> bool { - sheet - .get_row_dimension(&row_num) - .map(|dim| *dim.get_hidden()) - .unwrap_or(false) -} - -fn process_sheet( - sheet: &Worksheet, - config: &ProcessingConfig, - args: &Arguments, -) -> Result<(), Error> { - let dims = calculate_dimensions(sheet); - let hidden_columns = get_hidden_columns(sheet, dims.num_cols, config.include_hidden_columns); - let merged_cells = MergedCells::new(sheet, config.fill_horizontal, config.fill_vertical); - - if args.skip_rows > dims.num_rows { - return Err(Error::new("Number of rows < number of rows to skip")); - } - - let empty_row = create_empty_row(dims.num_cols, hidden_columns.len() as u32, args); - - let stdout = stdout(); - let mut writer = BufWriter::new(stdout.lock()); - let mut seq_row_num = 0; - - for row_num in (args.skip_rows + 1)..=dims.num_rows { - // Skip hidden rows if configured - if !config.include_hidden_rows && is_row_hidden(sheet, row_num) { - continue; - } - - // Handle empty rows - if sheet.get_row_dimension(&row_num).is_none() { - seq_row_num += 1; - let line = format!( - "{}{}", - number_row(&args.number_rows, args.separator, seq_row_num, row_num), - empty_row - ); - writer.write_all(line.as_bytes()).unwrap(); - continue; - } - - seq_row_num += 1; - let line = process_row( - sheet, - row_num, - seq_row_num, - dims.num_cols, - &hidden_columns, - &merged_cells, - args, - )?; - - writer.write_all(line.as_bytes()).unwrap(); - } - - Ok(()) -} - -fn process_row( - sheet: &Worksheet, - row_num: u32, - seq_row_num: u32, - num_cols: u32, - hidden_columns: &[u32], - merged_cells: &MergedCells, - args: &Arguments, -) -> Result { - let mut line = number_row(&args.number_rows, args.separator, seq_row_num, row_num); - - let mut first = true; - for col_num in 1..=num_cols { - if hidden_columns.contains(&col_num) { - continue; - } - - if !first { - line.push(args.separator); - } - first = false; - - let cell_value = get_cell_value(sheet, col_num, row_num, merged_cells); - let processed_value = process_cell_value(cell_value, args)?; - line += &processed_value; - } - - line += args.end_of_line.as_str(); - Ok(line) -} - -fn get_cell_value( - sheet: &Worksheet, - col_num: u32, - row_num: u32, - merged_cells: &MergedCells, -) -> String { - let cell = match sheet.get_cell((col_num, row_num)) { - Some(cell) => cell, - None => return String::new(), - }; - - // Check if cell is in a merged range - if let Some((merge_col, merge_row)) = merged_cells.in_merged_cell(col_num, row_num) { - sheet - .get_cell((merge_col, merge_row)) - .map(get_value) - .unwrap_or_default() - } else { - get_value(cell) - } -} - -fn process_cell_value(mut value: String, args: &Arguments) -> Result { - // Trim spaces - value = match args.trim { - TrimSpaces::End => value.trim_end().to_string(), - TrimSpaces::Start => value.trim_start().to_string(), - TrimSpaces::Both => value.trim().to_string(), - TrimSpaces::None => value, - }; - - // Replace line breaks - value = value.replace('\r', "").replace('\n', " "); - - // Handle separator replacement - if let Some(ref replacement) = args.replace_separator_by { - value = value.replace(args.separator, replacement); - } else if value.contains(args.separator) { - return Err(Error::new( - "Cell contains separator char, use -r to choose a replacement char", - )); - } - - // Handle end of line replacement - if let Some(ref replacement) = args.replace_end_of_line_by { - value = value.replace(&args.end_of_line, replacement); - } else if value.contains(&args.end_of_line) { - return Err(Error::new( - "Cell contains end of line string, use -R to choose a replacement string", - )); - } - - Ok(value) -} - -// === Utility Functions === - -fn create_empty_row(num_cols: u32, num_hidden: u32, args: &Arguments) -> String { - let visible_cols = num_cols - num_hidden; - let mut row = String::new(); - - for _ in 1..visible_cols { - row.push(args.separator); - } - row += args.end_of_line.as_str(); - row -} - -fn number_row( - number_row: &NumberRows, - separator: char, - seq_row_num: u32, - actual_row: u32, -) -> String { - match number_row { - NumberRows::AsIs => format!("{}{}", actual_row, separator), - NumberRows::Sequential => format!("{}{}", seq_row_num, separator), - NumberRows::None => String::from(""), - } -} - -fn get_value(cell: &Cell) -> String { - match cell.get_raw_value() { - umya_spreadsheet::CellRawValue::String(val) => String::from(val.clone()), - umya_spreadsheet::CellRawValue::RichText(text) => (*text.get_text()).to_owned(), - umya_spreadsheet::CellRawValue::Lazy(lazy) => (*lazy.clone()).to_owned(), - umya_spreadsheet::CellRawValue::Numeric(num) => num.to_string(), - umya_spreadsheet::CellRawValue::Bool(bo) => bo.to_string(), - umya_spreadsheet::CellRawValue::Error(_) | umya_spreadsheet::CellRawValue::Empty => { - String::new() - } - } -} - -// === Merged Cells === - -struct MergedCells { - merged_cells: Vec, - fill_horizontal: bool, - fill_vertical: bool, -} - -impl MergedCells { - pub fn new(sheet: &Worksheet, fill_horizontal: bool, fill_vertical: bool) -> Self { - let merged_cells = sheet.get_merge_cells().iter().cloned().collect(); - - Self { - merged_cells, - fill_horizontal, - fill_vertical, - } - } - - pub fn in_merged_cell(&self, col: u32, row: u32) -> Option<(u32, u32)> { - for range in &self.merged_cells { - if self.is_in_range(range, col, row) { - let col_start = *range.get_coordinate_start_col()?.get_num(); - let row_start = *range.get_coordinate_start_row()?.get_num(); - - if (self.fill_horizontal || col == col_start) - && (self.fill_vertical || row == row_start) - { - return Some((col_start, row_start)); - } - } - } - None - } - - fn is_in_range(&self, range: &Range, col: u32, row: u32) -> bool { - let Some(start_col) = range.get_coordinate_start_col() else { - return false; - }; - let Some(end_col) = range.get_coordinate_end_col() else { - return false; - }; - let Some(start_row) = range.get_coordinate_start_row() else { - return false; - }; - let Some(end_row) = range.get_coordinate_end_row() else { - return false; - }; - - col >= *start_col.get_num() - && col <= *end_col.get_num() - && row >= *start_row.get_num() - && row <= *end_row.get_num() - } -}