renommage xlsxtocsv -> xlsx

refactor
Nicolas Sanchez 4 weeks ago
parent 78d955afcb
commit 3f1feea6cf

@ -1,4 +1,5 @@
use clap::{Parser, ValueEnum};
use clap::Parser;
use clap::ValueEnum;
use std::fmt;
#[derive(Clone, Debug, ValueEnum)]
@ -82,8 +83,8 @@ impl fmt::Display for NumberRows {
}
}
#[derive(Parser, Debug)]
pub struct Arguments {
#[derive(Parser, Debug, Clone)]
pub struct RawArguments {
/// Path to the xlsx file
#[arg()]
pub file: String,
@ -124,3 +125,83 @@ pub struct Arguments {
#[arg(long)]
pub filter: Option<String>,
}
#[derive(Debug, Clone)]
pub struct Arguments {
/// Path to the xlsx file
pub file: String,
/// List all worksheets
pub list_worksheets: bool,
/// Separator
pub separator: char,
/// Replace separator char in cells by
pub replace_separator_by: Option<String>,
/// include hidden lines to output
pub include_hidden: IncludeHidden,
pub include_hidden_rows: bool,
pub include_hidden_columns: bool,
/// If merged cells, fill horizontally, vertically, both, or none
pub fill_merged_cells: FillMergedCells,
pub fill_merged_cells_horizontal: bool,
pub fill_merged_cells_vertical: bool,
/// Chosse worksheet
pub worksheet: String,
/// Trim white spaces at end of cells
pub trim: TrimSpaces,
/// number the rows in first cell of each line
pub number_rows: NumberRows,
/// avoid nth first rows of xlsx file
pub skip_rows: u32,
/// change end of line character
pub end_of_line: String,
/// replacement for end of line character inside cells
pub replace_end_of_line_by: Option<String>,
/// a rhai function to filter lines
pub filter: Option<String>,
}
impl Arguments {
pub fn parse() -> Self {
let raw = RawArguments::parse();
raw.into()
}
}
impl From<RawArguments> for Arguments {
fn from(raw: RawArguments) -> Self {
let (include_hidden_columns, include_hidden_rows) = match raw.include_hidden {
IncludeHidden::None => (false, false),
IncludeHidden::Rows => (false, true),
IncludeHidden::Columns => (true, false),
IncludeHidden::Both => (true, true),
};
let (fill_merged_cells_horizontal, fill_merged_cells_vertical) = match raw.fill_merged_cells
{
FillMergedCells::None => (false, false),
FillMergedCells::Horizontal => (true, false),
FillMergedCells::Vertical => (false, true),
FillMergedCells::Both => (true, true),
};
Arguments {
file: raw.file,
list_worksheets: raw.list_worksheets,
separator: raw.separator,
replace_separator_by: raw.replace_separator_by,
include_hidden: raw.include_hidden,
include_hidden_columns,
include_hidden_rows,
fill_merged_cells: raw.fill_merged_cells,
fill_merged_cells_horizontal,
fill_merged_cells_vertical,
worksheet: raw.worksheet,
trim: raw.trim,
number_rows: raw.number_rows,
skip_rows: raw.skip_rows,
end_of_line: raw.end_of_line,
replace_end_of_line_by: raw.replace_end_of_line_by,
filter: raw.filter,
}
}
}

@ -20,3 +20,15 @@ impl From<umya_spreadsheet::XlsxError> for Error {
Error::XlsxError(value.to_string())
}
}
impl From<&str> for Error {
fn from(value: &str) -> Self {
Error::Msg(value.to_string())
}
}
impl From<String> for Error {
fn from(value: String) -> Self {
Error::Msg(value)
}
}

@ -1,2 +1,3 @@
pub mod arguments;
pub mod error;
pub mod xlsxtocsv;
pub mod xlsx;

@ -1,8 +1,4 @@
pub mod arguments;
use arguments::Arguments;
use clap::Parser;
use xlsxtocsv::xlsxtocsv;
use xlsxtocsv::{arguments::Arguments, xlsx::xlsxtocsv};
fn main() {
let args = Arguments::parse();

@ -2,9 +2,7 @@ use std::io::{BufWriter, Write, stdout};
use std::path::Path;
use umya_spreadsheet::{Cell, Range, Worksheet, reader};
//use rhai::
use crate::arguments::{Arguments, IncludeHidden, NumberRows, TrimSpaces};
use crate::arguments::{Arguments, NumberRows, TrimSpaces};
use crate::error::Error;
pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
@ -20,37 +18,26 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
return Ok(());
}
let (include_hidden_columns, include_hidden_rows) = match args.include_hidden {
IncludeHidden::None => (false, false),
IncludeHidden::Rows => (false, true),
IncludeHidden::Columns => (true, false),
IncludeHidden::Both => (true, true),
};
let sheet = match book.get_sheet_by_name(&args.worksheet) {
Some(sheet) => sheet,
None => {
let sheetnum: u32 = match args.worksheet.parse() {
Ok(sheetnum) => sheetnum,
Err(_) => return Err(Error::new("cannot open sheet")),
Err(_) => return Err("cannot open sheet".into()),
};
match book.get_sheet(&(sheetnum as usize)) {
Some(sheet) => sheet,
None => return Err(Error::new("cannot open sheet")),
None => return Err("cannot open sheet".into()),
}
}
};
// set the merged cells policy
let (horiz, vert) = match args.fill_merged_cells {
crate::arguments::FillMergedCells::None => (false, false),
crate::arguments::FillMergedCells::Horizontal => (true, false),
crate::arguments::FillMergedCells::Vertical => (false, true),
crate::arguments::FillMergedCells::Both => (true, true),
};
// get all the merged cells
let merged_cells = MergedCells::new(sheet, horiz, vert);
let merged_cells = MergedCells::new(
sheet,
args.fill_merged_cells_horizontal,
args.fill_merged_cells_vertical,
);
// get non-empty value size of the worksheet
let mut num_cols = 0;
@ -80,7 +67,7 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
get hidden columns if needed
*/
let mut hidden_columns: Vec<u32> = Vec::new();
if !include_hidden_columns {
if !args.include_hidden_columns {
for i in 1..=num_cols {
if let Some(dim) = sheet.get_column_dimension_by_number(&i)
&& *dim.get_hidden()
@ -98,7 +85,7 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
empty_row += args.end_of_line.as_str();
if args.skip_rows > num_rows {
return Err(Error::new("Number of rows < number of rows to skip"));
return Err("Number of rows < number of rows to skip".into());
}
let stdout = stdout();
@ -110,7 +97,7 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
let mut line = String::from("");
// take hidden rows if asked for
if !include_hidden_rows {
if !args.include_hidden_rows {
match sheet.get_row_dimension(&i) {
Some(dim) => {
if *dim.get_hidden() {
@ -175,24 +162,20 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
if let Some(ref replacement) = args.replace_separator_by {
value = value.replace(args.separator, replacement);
} else if value.contains(args.separator) {
return Err(Error::new(
format!(
"Cell {} contains separator char, use -r to choose a replacement char",
cell.get_coordinate().get_coordinate()
)
.as_str(),
));
return Err(format!(
"Cell {} contains separator char, use -r to choose a replacement char",
cell.get_coordinate().get_coordinate()
)
.into());
}
if let Some(ref replacement) = args.replace_end_of_line_by {
value = value.replace(&args.end_of_line, replacement);
} else if value.contains(&args.end_of_line) {
return Err(Error::new(
format!(
"Cell {} contains end of line string, use -R to choose a replacement string",
cell.get_coordinate().get_coordinate()
)
.as_str(),
));
return Err(format!(
"Cell {} contains end of line string, use -R to choose a replacement string",
cell.get_coordinate().get_coordinate()
)
.into());
}
line += value.as_str();
}

@ -1,373 +0,0 @@
use std::io::{BufWriter, Write, stdout};
use std::path::Path;
use umya_spreadsheet::{Cell, Range, Spreadsheet, Worksheet, reader};
use crate::arguments::{Arguments, FillMergedCells, IncludeHidden, NumberRows, TrimSpaces};
use crate::error::Error;
pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
let book = reader::xlsx::read(Path::new(&args.file))
.unwrap_or_else(|_| panic!("Can't open {}", args.file));
if args.list_worksheets {
list_worksheets(&book);
return Ok(());
}
let sheet = get_sheet(&book, &args.worksheet)?;
let config = ProcessingConfig::from_args(args);
process_sheet(sheet, &config, args)?;
Ok(())
}
// === Configuration ===
struct ProcessingConfig {
include_hidden_columns: bool,
include_hidden_rows: bool,
fill_horizontal: bool,
fill_vertical: bool,
}
impl ProcessingConfig {
fn from_args(args: &Arguments) -> Self {
let (include_hidden_columns, include_hidden_rows) = match args.include_hidden {
IncludeHidden::None => (false, false),
IncludeHidden::Rows => (false, true),
IncludeHidden::Columns => (true, false),
IncludeHidden::Both => (true, true),
};
let (fill_horizontal, fill_vertical) = match args.fill_merged_cells {
FillMergedCells::None => (false, false),
FillMergedCells::Horizontal => (true, false),
FillMergedCells::Vertical => (false, true),
FillMergedCells::Both => (true, true),
};
Self {
include_hidden_columns,
include_hidden_rows,
fill_horizontal,
fill_vertical,
}
}
}
// === Sheet Selection ===
fn list_worksheets(book: &Spreadsheet) {
println!("List of worksheets :");
let sheets = book.get_sheet_collection();
for (i, sheet) in sheets.iter().enumerate() {
println!(" {:3}: {}", i, sheet.get_name());
}
}
fn get_sheet<'a>(book: &'a Spreadsheet, worksheet_name: &str) -> Result<&'a Worksheet, Error> {
if let Some(sheet) = book.get_sheet_by_name(worksheet_name) {
return Ok(sheet);
}
let sheetnum: u32 = worksheet_name
.parse()
.map_err(|_| Error::new("cannot open sheet"))?;
book.get_sheet(&(sheetnum as usize))
.ok_or_else(|| Error::new("cannot open sheet"))
}
// === Dimensions ===
struct SheetDimensions {
num_cols: u32,
num_rows: u32,
}
fn calculate_dimensions(sheet: &Worksheet) -> SheetDimensions {
let mut num_cols = 0;
let mut num_rows = 0;
for cell in sheet.get_cell_collection() {
let value = get_value(cell);
if value.is_empty() {
continue;
}
let coord = cell.get_coordinate();
let col_num = *coord.get_col_num();
let row_num = *coord.get_row_num();
if col_num > num_cols {
num_cols = col_num;
}
if row_num > num_rows {
num_rows = row_num;
}
}
SheetDimensions { num_cols, num_rows }
}
// === Hidden Columns ===
fn get_hidden_columns(sheet: &Worksheet, num_cols: u32, include_hidden: bool) -> Vec<u32> {
if include_hidden {
return Vec::new();
}
(1..=num_cols)
.filter(|&i| {
sheet
.get_column_dimension_by_number(&i)
.map(|dim| *dim.get_hidden())
.unwrap_or(false)
})
.collect()
}
// === Row Processing ===
fn is_row_hidden(sheet: &Worksheet, row_num: u32) -> bool {
sheet
.get_row_dimension(&row_num)
.map(|dim| *dim.get_hidden())
.unwrap_or(false)
}
fn process_sheet(
sheet: &Worksheet,
config: &ProcessingConfig,
args: &Arguments,
) -> Result<(), Error> {
let dims = calculate_dimensions(sheet);
let hidden_columns = get_hidden_columns(sheet, dims.num_cols, config.include_hidden_columns);
let merged_cells = MergedCells::new(sheet, config.fill_horizontal, config.fill_vertical);
if args.skip_rows > dims.num_rows {
return Err(Error::new("Number of rows < number of rows to skip"));
}
let empty_row = create_empty_row(dims.num_cols, hidden_columns.len() as u32, args);
let stdout = stdout();
let mut writer = BufWriter::new(stdout.lock());
let mut seq_row_num = 0;
for row_num in (args.skip_rows + 1)..=dims.num_rows {
// Skip hidden rows if configured
if !config.include_hidden_rows && is_row_hidden(sheet, row_num) {
continue;
}
// Handle empty rows
if sheet.get_row_dimension(&row_num).is_none() {
seq_row_num += 1;
let line = format!(
"{}{}",
number_row(&args.number_rows, args.separator, seq_row_num, row_num),
empty_row
);
writer.write_all(line.as_bytes()).unwrap();
continue;
}
seq_row_num += 1;
let line = process_row(
sheet,
row_num,
seq_row_num,
dims.num_cols,
&hidden_columns,
&merged_cells,
args,
)?;
writer.write_all(line.as_bytes()).unwrap();
}
Ok(())
}
fn process_row(
sheet: &Worksheet,
row_num: u32,
seq_row_num: u32,
num_cols: u32,
hidden_columns: &[u32],
merged_cells: &MergedCells,
args: &Arguments,
) -> Result<String, Error> {
let mut line = number_row(&args.number_rows, args.separator, seq_row_num, row_num);
let mut first = true;
for col_num in 1..=num_cols {
if hidden_columns.contains(&col_num) {
continue;
}
if !first {
line.push(args.separator);
}
first = false;
let cell_value = get_cell_value(sheet, col_num, row_num, merged_cells);
let processed_value = process_cell_value(cell_value, args)?;
line += &processed_value;
}
line += args.end_of_line.as_str();
Ok(line)
}
fn get_cell_value(
sheet: &Worksheet,
col_num: u32,
row_num: u32,
merged_cells: &MergedCells,
) -> String {
let cell = match sheet.get_cell((col_num, row_num)) {
Some(cell) => cell,
None => return String::new(),
};
// Check if cell is in a merged range
if let Some((merge_col, merge_row)) = merged_cells.in_merged_cell(col_num, row_num) {
sheet
.get_cell((merge_col, merge_row))
.map(get_value)
.unwrap_or_default()
} else {
get_value(cell)
}
}
fn process_cell_value(mut value: String, args: &Arguments) -> Result<String, Error> {
// Trim spaces
value = match args.trim {
TrimSpaces::End => value.trim_end().to_string(),
TrimSpaces::Start => value.trim_start().to_string(),
TrimSpaces::Both => value.trim().to_string(),
TrimSpaces::None => value,
};
// Replace line breaks
value = value.replace('\r', "").replace('\n', " ");
// Handle separator replacement
if let Some(ref replacement) = args.replace_separator_by {
value = value.replace(args.separator, replacement);
} else if value.contains(args.separator) {
return Err(Error::new(
"Cell contains separator char, use -r to choose a replacement char",
));
}
// Handle end of line replacement
if let Some(ref replacement) = args.replace_end_of_line_by {
value = value.replace(&args.end_of_line, replacement);
} else if value.contains(&args.end_of_line) {
return Err(Error::new(
"Cell contains end of line string, use -R to choose a replacement string",
));
}
Ok(value)
}
// === Utility Functions ===
fn create_empty_row(num_cols: u32, num_hidden: u32, args: &Arguments) -> String {
let visible_cols = num_cols - num_hidden;
let mut row = String::new();
for _ in 1..visible_cols {
row.push(args.separator);
}
row += args.end_of_line.as_str();
row
}
fn number_row(
number_row: &NumberRows,
separator: char,
seq_row_num: u32,
actual_row: u32,
) -> String {
match number_row {
NumberRows::AsIs => format!("{}{}", actual_row, separator),
NumberRows::Sequential => format!("{}{}", seq_row_num, separator),
NumberRows::None => String::from(""),
}
}
fn get_value(cell: &Cell) -> String {
match cell.get_raw_value() {
umya_spreadsheet::CellRawValue::String(val) => String::from(val.clone()),
umya_spreadsheet::CellRawValue::RichText(text) => (*text.get_text()).to_owned(),
umya_spreadsheet::CellRawValue::Lazy(lazy) => (*lazy.clone()).to_owned(),
umya_spreadsheet::CellRawValue::Numeric(num) => num.to_string(),
umya_spreadsheet::CellRawValue::Bool(bo) => bo.to_string(),
umya_spreadsheet::CellRawValue::Error(_) | umya_spreadsheet::CellRawValue::Empty => {
String::new()
}
}
}
// === Merged Cells ===
struct MergedCells {
merged_cells: Vec<Range>,
fill_horizontal: bool,
fill_vertical: bool,
}
impl MergedCells {
pub fn new(sheet: &Worksheet, fill_horizontal: bool, fill_vertical: bool) -> Self {
let merged_cells = sheet.get_merge_cells().iter().cloned().collect();
Self {
merged_cells,
fill_horizontal,
fill_vertical,
}
}
pub fn in_merged_cell(&self, col: u32, row: u32) -> Option<(u32, u32)> {
for range in &self.merged_cells {
if self.is_in_range(range, col, row) {
let col_start = *range.get_coordinate_start_col()?.get_num();
let row_start = *range.get_coordinate_start_row()?.get_num();
if (self.fill_horizontal || col == col_start)
&& (self.fill_vertical || row == row_start)
{
return Some((col_start, row_start));
}
}
}
None
}
fn is_in_range(&self, range: &Range, col: u32, row: u32) -> bool {
let Some(start_col) = range.get_coordinate_start_col() else {
return false;
};
let Some(end_col) = range.get_coordinate_end_col() else {
return false;
};
let Some(start_row) = range.get_coordinate_start_row() else {
return false;
};
let Some(end_row) = range.get_coordinate_end_row() else {
return false;
};
col >= *start_col.get_num()
&& col <= *end_col.get_num()
&& row >= *start_row.get_num()
&& row <= *end_row.get_num()
}
}
Loading…
Cancel
Save