use std::io::{BufWriter, Write, stdout}; use std::path::Path; use umya_spreadsheet::{Cell, Range, Worksheet, reader}; use crate::arguments::{Arguments, IncludeHidden, NumberRows, TrimSpaces}; use crate::error::Error; pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> { let book = reader::xlsx::read(Path::new(&args.file)) .expect(format!("Can't open {}", args.file).as_str()); if args.list_worksheets { println!("List of worksheets :"); let mut i = 0; let sheets = book.get_sheet_collection(); for sheet in sheets { println!(" {:3}: {}", i, sheet.get_name()); i += 1; } return Ok(()); } let (include_hidden_columns, include_hidden_rows) = match args.include_hidden { IncludeHidden::None => (false, false), IncludeHidden::Rows => (false, true), IncludeHidden::Columns => (true, false), IncludeHidden::Both => (true, true), }; // get the sheet from name or number if specified, else the first of the spreadsheet let sheet = match book.get_sheet_by_name(&args.worksheet) { Some(sheet) => sheet, None => { let sheetnum: u32 = match args.worksheet.parse() { Ok(sheetnum) => sheetnum, Err(_) => return Err(Error::new("cannot open sheet")), }; let sheet = match book.get_sheet(&(sheetnum as usize)) { Some(sheet) => sheet, None => return Err(Error::new("cannot open sheet")), }; sheet } }; // set the merged cells policy let (horiz, vert) = match args.fill_merged_cells { crate::arguments::FillMergedCells::None => (false, false), crate::arguments::FillMergedCells::Horizontal => (true, false), crate::arguments::FillMergedCells::Vertical => (false, true), crate::arguments::FillMergedCells::Both => (true, true), }; // get all the merged cells let merged_cells = MergedCells::new(sheet, horiz, vert); // get non-empty value size of the worksheet let mut num_cols = 0; let mut num_rows = 0; for cell in sheet.get_cell_collection() { let value = get_value(cell); //.get_formatted_value(); if value == "" { continue; } let coord = cell.get_coordinate(); let col_num = coord.get_col_num().clone(); let row_num = coord.get_row_num().clone(); if col_num > num_cols { num_cols = col_num; } if row_num > num_rows { num_rows = row_num; } } let num_cols = num_cols; let num_rows = num_rows; // get hidden columns if needed let mut hidden_columns: Vec = Vec::new(); if !include_hidden_columns { for i in 1..=num_cols { if let Some(dim) = sheet.get_column_dimension_by_number(&i) { if *dim.get_hidden() { hidden_columns.push(i); } } } } // create the empty row string let mut empty_row = String::from(""); for _ in 1..num_cols - (hidden_columns.len() as u32) { empty_row.push(args.separator); } empty_row += args.end_of_line.as_str(); if args.skip_rows > num_rows { return Err(Error::new("Number of rows < number of rows to skip")); } let stdout = stdout(); let mut writer = BufWriter::new(stdout.lock()); // for each row... let mut seq_row_num = 0; for i in (args.skip_rows + 1)..=num_rows { let mut line = String::from(""); // take hidden rows if asked for if !include_hidden_rows { match sheet.get_row_dimension(&i) { Some(dim) => { if *dim.get_hidden() { continue; } } None => { seq_row_num += 1; line += number_row(&args.number_rows, args.separator, seq_row_num, i).as_str(); line += empty_row.as_str(); writer.write(line.as_bytes()).unwrap(); continue; } } } // number the row seq_row_num += 1; line += number_row(&args.number_rows, args.separator, seq_row_num, i).as_str(); // for each column in row... let mut first = true; for j in 1..=num_cols { // if the column j has to be hidden, go to the next if hidden_columns.contains(&j) { continue; } if first { first = false; } else { line.push(args.separator); } let cell = match sheet.get_cell((j, i)) { Some(cell) => cell, None => continue, }; // get value from cell depending on merged cells and fill merged policy let cell_coordinate = cell.get_coordinate(); let mut value; if let Some((col, row)) = merged_cells.in_merged_cell( *cell_coordinate.get_col_num(), *cell_coordinate.get_row_num(), ) { value = match sheet.get_cell((col, row)) { Some(merged_cell) => get_value(merged_cell), //.get_formatted_value(), None => String::from(""), } } else { value = get_value(cell); //.get_formatted_value(); } // apply modifications to cells value (trim spaces, replace separator chars, line breaks etc.) value = match args.trim { TrimSpaces::End => String::from(value.trim_end()), TrimSpaces::Start => String::from(value.trim_start()), TrimSpaces::Both => String::from(value.trim()), TrimSpaces::None => value, }; value = value.replace('\r', "").replace('\n', " "); if let Some(ref replacement) = args.replace_separator_by { value = value.replace(args.separator, replacement); } else { if value.contains(args.separator) { return Err(Error::new( format!( "Cell {} contains separator char, use -r to choose a replacement char", cell.get_coordinate().get_coordinate() ) .as_str(), )); } } if let Some(ref replacement) = args.replace_end_of_line_by { value = value.replace(&args.end_of_line, replacement); } else { if value.contains(&args.end_of_line) { return Err(Error::new( format!( "Cell {} contains end of line string, use -R to choose a replacement string", cell.get_coordinate().get_coordinate() ) .as_str(), )); } } line += value.as_str(); } line += args.end_of_line.as_str(); writer.write(line.as_bytes()).unwrap(); } Ok(()) } fn number_row(number_row: &NumberRows, separator: char, seqrownum: u32, i: u32) -> String { match number_row { NumberRows::AsIs => format!("{}{}", i, separator), NumberRows::Sequential => format!("{}{}", seqrownum, separator), NumberRows::None => String::from(""), } } fn get_value(cell: &Cell) -> String { match cell.get_raw_value() { umya_spreadsheet::CellRawValue::String(val) => String::from(val.clone()), umya_spreadsheet::CellRawValue::RichText(text) => (*text.get_text()).to_owned(), umya_spreadsheet::CellRawValue::Lazy(lazy) => (*lazy.clone()).to_owned(), umya_spreadsheet::CellRawValue::Numeric(num) => format!("{}", num), umya_spreadsheet::CellRawValue::Bool(bo) => format!("{}", bo), umya_spreadsheet::CellRawValue::Error(_) => String::from(""), umya_spreadsheet::CellRawValue::Empty => String::from(""), } } struct MergedCells { merged_cells: Vec, fill_horizontal: bool, fill_vertical: bool, } impl MergedCells { pub fn new(sheet: &Worksheet, fill_horizontal: bool, fill_vertical: bool) -> Self { let merged = sheet.get_merge_cells(); let mut merged_cells: Vec = vec![]; for cell in merged { merged_cells.push(cell.clone()); } MergedCells { merged_cells, fill_horizontal, fill_vertical, } } pub fn in_merged_cell(&self, col: u32, row: u32) -> Option<(u32, u32)> { for range in &self.merged_cells { if col >= *range.get_coordinate_start_col().unwrap().get_num() && col <= *range.get_coordinate_end_col().unwrap().get_num() && row >= *range.get_coordinate_start_row().unwrap().get_num() && row <= *range.get_coordinate_end_row().unwrap().get_num() { let col_start = range.get_coordinate_start_col().unwrap().get_num().clone(); let row_start = range.get_coordinate_start_row().unwrap().get_num().clone(); if self.fill_horizontal && self.fill_vertical || self.fill_horizontal && row == row_start || self.fill_vertical && col == col_start || col == col_start && row == row_start { return Some((col_start, row_start)); } } } None } }