Compare commits

...

5 Commits

3060
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -5,4 +5,14 @@ edition = "2024"
[dependencies]
clap = { version = "4.5.48", features = ["derive"] }
polars = {version = "0.53.0", optional = true, features=["lazy"]}
rhai = "1.23.6"
umya-spreadsheet = "2.3.3"
[features]
default = ["csv", "lazyframe"]
csv = []
lazyframe = ["dep:polars"]
[package.metadata.docs.rs]
all-features = true

251
f

File diff suppressed because one or more lines are too long

Binary file not shown.

@ -0,0 +1 @@
edition = "2024"

@ -1,83 +1,94 @@
use clap::{Parser, ValueEnum};
use clap::Parser;
use clap::ValueEnum;
use std::fmt;
#[derive(Clone, Debug, ValueEnum)]
#[derive(Clone, Debug, ValueEnum, Default)]
pub enum FillMergedCells {
None,
Horizontal,
Vertical,
Both
#[default]
Both,
}
impl ToString for FillMergedCells {
fn to_string(&self) -> String {
impl fmt::Display for FillMergedCells {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FillMergedCells::None => "none".into(),
FillMergedCells::Horizontal => "horizontal".into(),
FillMergedCells::Vertical => "vertical".into(),
FillMergedCells::Both => "both".into(),
FillMergedCells::None => write!(f, "none"),
FillMergedCells::Horizontal => write!(f, "horizontal"),
FillMergedCells::Vertical => write!(f, "vertical"),
FillMergedCells::Both => write!(f, "both"),
}
}
}
#[derive(Clone, Debug, ValueEnum)]
#[derive(Clone, Debug, ValueEnum, Default)]
pub enum IncludeHidden {
None,
Rows,
Columns,
#[default]
Both,
}
impl ToString for IncludeHidden {
fn to_string(&self) -> String {
impl fmt::Display for IncludeHidden {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IncludeHidden::None => "none".into(),
IncludeHidden::Rows => "rows".into(),
IncludeHidden::Columns => "columns".into(),
IncludeHidden::Both => "both".into(),
IncludeHidden::None => write!(f, "none"),
IncludeHidden::Rows => write!(f, "rows"),
IncludeHidden::Columns => write!(f, "columns"),
IncludeHidden::Both => write!(f, "both"),
}
}
}
#[derive(Clone, Debug, ValueEnum)]
#[derive(Clone, Debug, ValueEnum, Default)]
pub enum TrimSpaces {
End,
Start,
Both,
None
#[default]
None,
}
impl ToString for TrimSpaces {
fn to_string(&self) -> String {
match self {
TrimSpaces::End => "end".into(),
TrimSpaces::Start => "start".into(),
TrimSpaces::Both => "both".into(),
TrimSpaces::None => "none".into(),
}
impl fmt::Display for TrimSpaces {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}",
match self {
TrimSpaces::End => "end",
TrimSpaces::Start => "start",
TrimSpaces::Both => "both",
TrimSpaces::None => "none",
}
)
}
}
#[derive(Clone, Debug, ValueEnum)]
#[derive(Clone, Debug, ValueEnum, Default)]
pub enum NumberRows {
AsIs,
Sequential,
#[default]
None,
}
impl ToString for NumberRows {
fn to_string(&self) -> String {
match self {
NumberRows::AsIs => "as-is".into(),
NumberRows::Sequential => "sequential".into(),
NumberRows::None => "none".into(),
}
impl fmt::Display for NumberRows {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}",
match self {
NumberRows::AsIs => "as-is",
NumberRows::Sequential => "sequential",
NumberRows::None => "none",
}
)
}
}
#[derive(Parser, Debug)]
pub struct Arguments {
#[derive(Parser, Debug, Clone)]
pub(crate) struct RawArguments {
/// Path to the xlsx file
#[arg()]
pub file: String,
@ -99,6 +110,9 @@ pub struct Arguments {
/// Chosse worksheet
#[arg(short, long, default_value_t = String::from("0"))]
pub worksheet: String,
/// Choose active worksheet at save time in excel
#[arg(short, long, default_value_t = false)]
pub active_worksheet: bool,
/// Trim white spaces at end of cells
#[arg(short, long, default_value_t = TrimSpaces::None)]
pub trim: TrimSpaces,
@ -106,7 +120,7 @@ pub struct Arguments {
#[arg(short, long, default_value_t = NumberRows::None)]
pub number_rows: NumberRows,
/// avoid nth first rows of xlsx file
#[arg(short = 'k', long, default_value_t= 0)]
#[arg(short = 'k', long, default_value_t = 0)]
pub skip_rows: u32,
/// change end of line character
#[arg(short, long, default_value_t = String::from("\n"))]
@ -115,3 +129,138 @@ pub struct Arguments {
#[arg(short = 'R', long)]
pub replace_end_of_line_by: Option<String>,
}
/// Arguments structure is used to parse shell arguments (using clap)
///
/// Argument is also implicitly used by the XlsxReader .with_...() methods.
#[derive(Debug, Clone)]
pub struct Arguments {
/// Path to the xlsx file
pub(crate) file: String,
/// List all worksheets in the xlsx file
pub(crate) list_worksheets: bool,
/// Choose worksheet as name or number
pub(crate) worksheet: String,
/// Use the worksheet that was active when the file was last saved
pub(crate) active_worksheet: bool,
/// Trim white spaces at end of cells
/// Include hidden lines to output
// pub(crate) include_hidden: IncludeHidden,
pub(crate) include_hidden_rows: bool,
/// Include hidden columns to output
pub(crate) include_hidden_columns: bool,
/// If merged cells, fill horizontally
// pub(crate) fill_merged_cells: FillMergedCells,
pub(crate) fill_merged_cells_horizontal: bool,
/// If merged cells, fill vertically
pub(crate) fill_merged_cells_vertical: bool,
/// trim spaces at end of file
pub(crate) trim: TrimSpaces,
/// number the rows in first cell of each line
pub(crate) number_rows: NumberRows,
/// Avoid nth first rows of xlsx file
pub(crate) skip_rows: u32,
///# csv output specific options
/// Separator for output
pub(crate) separator: char,
/// Replace separator char in cells by another
pub(crate) replace_separator_by: Option<String>,
/// Set the end of line string, defaults to "\n"
pub(crate) end_of_line: String,
/// replacement for end of line string inside cells
pub(crate) replace_end_of_line_by: Option<String>,
}
impl Arguments {
pub fn parse() -> Self {
let raw = RawArguments::parse();
raw.into()
}
}
impl Default for Arguments {
fn default() -> Self {
Self {
file: Default::default(),
list_worksheets: Default::default(),
separator: ';',
replace_separator_by: Default::default(),
include_hidden_rows: Default::default(),
include_hidden_columns: Default::default(),
fill_merged_cells_horizontal: Default::default(),
fill_merged_cells_vertical: Default::default(),
worksheet: String::from("0"),
active_worksheet: false,
trim: Default::default(),
number_rows: Default::default(),
skip_rows: Default::default(),
end_of_line: String::from("\n"),
replace_end_of_line_by: Default::default(),
}
}
}
impl From<RawArguments> for Arguments {
fn from(raw: RawArguments) -> Self {
let (include_hidden_columns, include_hidden_rows) = match raw.include_hidden {
IncludeHidden::None => (false, false),
IncludeHidden::Rows => (false, true),
IncludeHidden::Columns => (true, false),
IncludeHidden::Both => (true, true),
};
let (fill_merged_cells_horizontal, fill_merged_cells_vertical) = match raw.fill_merged_cells
{
FillMergedCells::None => (false, false),
FillMergedCells::Horizontal => (true, false),
FillMergedCells::Vertical => (false, true),
FillMergedCells::Both => (true, true),
};
Arguments {
file: raw.file,
list_worksheets: raw.list_worksheets,
separator: raw.separator,
replace_separator_by: raw.replace_separator_by,
include_hidden_columns,
include_hidden_rows,
fill_merged_cells_horizontal,
fill_merged_cells_vertical,
worksheet: raw.worksheet,
active_worksheet: raw.active_worksheet,
trim: raw.trim,
number_rows: raw.number_rows,
skip_rows: raw.skip_rows,
end_of_line: raw.end_of_line,
replace_end_of_line_by: raw.replace_end_of_line_by,
}
}
}
pub trait IntoArgs {
fn into_args(self) -> Arguments;
}
impl IntoArgs for String {
fn into_args(self) -> Arguments {
Arguments {
file: self,
..Default::default()
}
}
}
impl IntoArgs for &str {
fn into_args(self) -> Arguments {
Arguments {
file: self.to_string(),
..Default::default()
}
}
}
impl IntoArgs for Arguments {
fn into_args(self) -> Arguments {
self
}
}

@ -1,18 +1,44 @@
use std::fmt;
#[derive(Debug, Clone)]
pub struct Error {
pub msg: String
pub enum Error {
Msg(String),
XlsxError(String),
IoError(String),
PolarsError(String),
}
impl Error {
pub fn new(msg: &str) -> Self {
Error { msg: String::from(msg)}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::Msg(msg) => write!(f, "XlsxToCsvError: {msg}"),
Error::XlsxError(msg) => write!(f, "XlsxError: {msg}"),
Error::IoError(msg) => write!(f, "IoError: {msg}"),
Error::PolarsError(msg) => write!(f, "PolarsError: {msg}"),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Error: {}", self.msg)
impl From<umya_spreadsheet::XlsxError> for Error {
fn from(value: umya_spreadsheet::XlsxError) -> Self {
Error::XlsxError(value.to_string())
}
}
impl From<&str> for Error {
fn from(value: &str) -> Self {
Error::Msg(value.to_string())
}
}
}
impl From<String> for Error {
fn from(value: String) -> Self {
Error::Msg(value)
}
}
impl From<std::io::Error> for Error {
fn from(value: std::io::Error) -> Self {
Error::IoError(value.to_string())
}
}

@ -0,0 +1,10 @@
pub mod arguments;
pub mod error;
pub mod xlsx;
pub mod xlsx_builder;
#[cfg(feature = "csv")]
pub mod xlsx_to_csv;
#[cfg(feature = "lazyframe")]
pub mod xlsx_to_lazyframe;

@ -1,14 +1,13 @@
pub mod arguments;
pub mod error;
pub mod xlsxtocsv;
use arguments::Arguments;
use clap::Parser;
use xlsxtocsv::xlsxtocsv;
fn main() {
let args = Arguments::parse();
if let Err(error) = xlsxtocsv(&args) {
eprintln!("{}", error);
}
pub use xlsxtocsv::{error::Error, xlsx::XlsxReader};
fn main() -> Result<(), Error> {
let lf = XlsxReader::new("noms.xlsx")
.with_active_worksheet()
.to_lazyframe()?;
let df = lf.collect()?;
println!("{df}");
Ok(())
}

@ -1,59 +1,177 @@
use std::cell::RefCell;
use std::io::{BufWriter, Write, stdout};
use std::path::Path;
use umya_spreadsheet::{Cell, Range, Worksheet, reader};
use umya_spreadsheet::{Cell, Range, Spreadsheet, Worksheet, reader};
use crate::arguments::{Arguments, IncludeHidden, NumberRows, TrimSpaces};
use crate::arguments::{Arguments, IntoArgs, NumberRows, TrimSpaces};
use crate::error::Error;
/// A reader for .xlsx files with extended options.
#[derive(Debug, Clone)]
pub struct XlsxReader {
pub(crate) args: Arguments,
pub(crate) book: Option<Spreadsheet>,
pub(crate) sheet_index: Option<usize>,
pub(crate) worksheet_dimensions: RefCell<Option<(u32, u32)>>,
}
impl XlsxReader {
pub fn new(args: impl IntoArgs) -> Self {
let args = args.into_args();
XlsxReader {
args,
book: None,
sheet_index: None,
worksheet_dimensions: RefCell::new(None),
}
}
pub fn list_worksheets(&self) -> Result<Vec<(usize, String)>, Error> {
let book = match &self.book {
Some(book) => book,
None => return Err("Call finish before list_worksheets.".into()),
};
let sheets = book.get_sheet_collection();
let mut res = vec![];
for (i, sheet) in sheets.iter().enumerate() {
res.push((i, String::from(sheet.get_name())));
}
Ok(res)
}
pub(crate) fn finish(&mut self) -> Result<(), Error> {
let book = reader::xlsx::read(Path::new(self.args.file.as_str()))?;
if self.args.active_worksheet && self.args.worksheet == "0" {
let sheetname = book.get_active_sheet().get_name();
self.args.worksheet = String::from(sheetname);
}
let sheet_index = Self::get_worksheet_index(&book, self.args.worksheet.as_str())?;
self.book = Some(book);
self.sheet_index = Some(sheet_index);
Ok(())
}
pub(crate) fn get_worksheet_index(
book: &Spreadsheet,
worksheet_name: &str,
) -> Result<usize, Error> {
let sheet_index = if book.get_sheet_by_name(worksheet_name).is_some() {
// Le nom existe - trouve son index
book.get_sheet_collection()
.iter()
.position(|s| s.get_name() == worksheet_name)
.ok_or("cannot find sheet index")?
} else if let Ok(num) = worksheet_name.parse::<usize>() {
// Pas de nom correspondant, essaye comme numéro
if book.get_sheet(&num).is_none() {
return Err("cannot open sheet".into());
}
num
} else {
return Err("cannot open sheet".into());
};
Ok(sheet_index)
}
pub(crate) fn get_sheet(&self) -> Result<&Worksheet, Error> {
let book = match &self.book {
Some(book) => book,
None => return Err("Call finish before get_sheet.".into()),
};
Ok(book.get_sheet(&self.sheet_index.unwrap()).unwrap())
}
pub(crate) fn get_worksheet_dimensions(&self) -> (u32, u32) {
if self.worksheet_dimensions.borrow().is_none() {
let mut num_cols = 0;
let mut num_rows = 0;
let sheet = self.get_sheet().unwrap();
for cell in sheet.get_cell_collection() {
let value = get_value(cell); //.get_formatted_value();
if value.is_empty() {
continue;
}
let coord = cell.get_coordinate();
let col_num = *coord.get_col_num();
let row_num = *coord.get_row_num();
if col_num > num_cols {
num_cols = col_num;
}
if row_num > num_rows {
num_rows = row_num;
}
}
let mut dim = self.worksheet_dimensions.borrow_mut();
*dim = Some((num_cols, num_rows));
}
let (c, r) = self.worksheet_dimensions.borrow().unwrap();
(c, r)
}
pub(crate) fn get_row(&self, row_num: u32) -> Vec<String> {
let num_cols = self.get_worksheet_dimensions().0 as usize;
let sheet = self.get_sheet().unwrap();
let row = sheet.get_collection_by_row(&row_num);
let mut res = vec![String::new(); num_cols];
for cell in row {
let value = cell.get_formatted_value();
let coord = cell.get_coordinate();
let col = *coord.get_col_num() - 1;
res[col as usize] = value;
}
res
}
}
pub fn list_worksheets(book: Spreadsheet) -> Result<Vec<(usize, String)>, Error> {
let sheets = book.get_sheet_collection();
let mut res = vec![];
for (i, sheet) in sheets.iter().enumerate() {
res.push((i, String::from(sheet.get_name())));
}
Ok(res)
}
pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
let book = reader::xlsx::read(Path::new(&args.file))
.expect(format!("Can't open {}", args.file).as_str());
let book = reader::xlsx::read(Path::new(&args.file))?;
if args.list_worksheets {
println!("List of worksheets :");
let mut i = 0;
let sheets = book.get_sheet_collection();
for sheet in sheets {
println!(" {:3}: {}", i, sheet.get_name());
i += 1;
}
return Ok(());
}
let (include_hidden_columns, include_hidden_rows) = match args.include_hidden {
IncludeHidden::None => (false, false),
IncludeHidden::Rows => (false, true),
IncludeHidden::Columns => (true, false),
IncludeHidden::Both => (true, true),
};
// get the sheet from name or number if specified, else the first of the spreadsheet
let sheet = match book.get_sheet_by_name(&args.worksheet) {
Some(sheet) => sheet,
None => {
let sheetnum: u32 = match args.worksheet.parse() {
Ok(sheetnum) => sheetnum,
Err(_) => return Err(Error::new("cannot open sheet")),
Err(_) => return Err("cannot open sheet".into()),
};
let sheet = match book.get_sheet(&(sheetnum as usize)) {
match book.get_sheet(&(sheetnum as usize)) {
Some(sheet) => sheet,
None => return Err(Error::new("cannot open sheet")),
};
sheet
None => return Err("cannot open sheet".into()),
}
}
};
// set the merged cells policy
let (horiz, vert) = match args.fill_merged_cells {
crate::arguments::FillMergedCells::None => (false, false),
crate::arguments::FillMergedCells::Horizontal => (true, false),
crate::arguments::FillMergedCells::Vertical => (false, true),
crate::arguments::FillMergedCells::Both => (true, true),
};
// get all the merged cells
let merged_cells = MergedCells::new(sheet, horiz, vert);
let merged_cells = MergedCells::new(
sheet,
args.fill_merged_cells_horizontal,
args.fill_merged_cells_vertical,
);
// get non-empty value size of the worksheet
let mut num_cols = 0;
@ -62,13 +180,13 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
for cell in sheet.get_cell_collection() {
let value = get_value(cell); //.get_formatted_value();
if value == "" {
if value.is_empty() {
continue;
}
let coord = cell.get_coordinate();
let col_num = coord.get_col_num().clone();
let row_num = coord.get_row_num().clone();
let col_num = *coord.get_col_num();
let row_num = *coord.get_row_num();
if col_num > num_cols {
num_cols = col_num;
}
@ -79,14 +197,16 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
let num_cols = num_cols;
let num_rows = num_rows;
// get hidden columns if needed
/*
get hidden columns if needed
*/
let mut hidden_columns: Vec<u32> = Vec::new();
if !include_hidden_columns {
if !args.include_hidden_columns {
for i in 1..=num_cols {
if let Some(dim) = sheet.get_column_dimension_by_number(&i) {
if *dim.get_hidden() {
hidden_columns.push(i);
}
if let Some(dim) = sheet.get_column_dimension_by_number(&i)
&& *dim.get_hidden()
{
hidden_columns.push(i);
}
}
}
@ -99,7 +219,7 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
empty_row += args.end_of_line.as_str();
if args.skip_rows > num_rows {
return Err(Error::new("Number of rows < number of rows to skip"));
return Err("Number of rows < number of rows to skip".into());
}
let stdout = stdout();
@ -111,7 +231,7 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
let mut line = String::from("");
// take hidden rows if asked for
if !include_hidden_rows {
if !args.include_hidden_rows {
match sheet.get_row_dimension(&i) {
Some(dim) => {
if *dim.get_hidden() {
@ -120,9 +240,9 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
}
None => {
seq_row_num += 1;
line += number_row(&args.number_rows, args.separator, seq_row_num, i).as_str();
line += number_row(&args.number_rows, args.separator, seq_row_num, i).as_str();
line += empty_row.as_str();
writer.write(line.as_bytes()).unwrap();
writer.write_all(line.as_bytes()).unwrap();
continue;
}
}
@ -175,34 +295,26 @@ pub fn xlsxtocsv(args: &Arguments) -> Result<(), Error> {
value = value.replace('\r', "").replace('\n', " ");
if let Some(ref replacement) = args.replace_separator_by {
value = value.replace(args.separator, replacement);
} else {
if value.contains(args.separator) {
return Err(Error::new(
format!(
"Cell {} contains separator char, use -r to choose a replacement char",
cell.get_coordinate().get_coordinate()
)
.as_str(),
));
}
} else if value.contains(args.separator) {
return Err(format!(
"Cell {} contains separator char, use -r to choose a replacement char",
cell.get_coordinate().get_coordinate()
)
.into());
}
if let Some(ref replacement) = args.replace_end_of_line_by {
value = value.replace(&args.end_of_line, replacement);
} else {
if value.contains(&args.end_of_line) {
return Err(Error::new(
format!(
"Cell {} contains end of line string, use -R to choose a replacement string",
cell.get_coordinate().get_coordinate()
)
.as_str(),
));
}
} else if value.contains(&args.end_of_line) {
return Err(format!(
"Cell {} contains end of line string, use -R to choose a replacement string",
cell.get_coordinate().get_coordinate()
)
.into());
}
line += value.as_str();
}
line += args.end_of_line.as_str();
writer.write(line.as_bytes()).unwrap();
writer.write_all(line.as_bytes()).unwrap();
}
Ok(())
@ -217,6 +329,7 @@ fn number_row(number_row: &NumberRows, separator: char, seqrownum: u32, i: u32)
}
fn get_value(cell: &Cell) -> String {
//cell.get_formatted_value()
match cell.get_raw_value() {
umya_spreadsheet::CellRawValue::String(val) => String::from(val.clone()),
umya_spreadsheet::CellRawValue::RichText(text) => (*text.get_text()).to_owned(),
@ -257,13 +370,11 @@ impl MergedCells {
&& row >= *range.get_coordinate_start_row().unwrap().get_num()
&& row <= *range.get_coordinate_end_row().unwrap().get_num()
{
let col_start = range.get_coordinate_start_col().unwrap().get_num().clone();
let row_start = range.get_coordinate_start_row().unwrap().get_num().clone();
let col_start = *range.get_coordinate_start_col().unwrap().get_num();
let row_start = *range.get_coordinate_start_row().unwrap().get_num();
if self.fill_horizontal && self.fill_vertical
|| self.fill_horizontal && row == row_start
|| self.fill_vertical && col == col_start
|| col == col_start && row == row_start
if (self.fill_horizontal || col == col_start)
&& (self.fill_vertical || row == row_start)
{
return Some((col_start, row_start));
}

@ -0,0 +1,116 @@
use crate::{
arguments::{FillMergedCells, IncludeHidden, NumberRows, TrimSpaces},
xlsx::XlsxReader,
};
impl XlsxReader {
pub fn with_separator(mut self, separator: char) -> Self {
self.args.separator = separator;
self
}
pub fn with_replace_separator_by(mut self, replacement: String) -> Self {
self.args.replace_separator_by = Some(replacement);
self
}
pub fn with_include_hidden_columns(mut self, include: bool) -> Self {
self.args.include_hidden_columns = include;
self
}
pub fn with_include_hidden_rows(mut self, include: bool) -> Self {
self.args.include_hidden_rows = include;
self
}
pub fn with_include_hidden(mut self, include: IncludeHidden) -> Self {
let (col, row) = match include {
IncludeHidden::None => (false, false),
IncludeHidden::Rows => (false, true),
IncludeHidden::Columns => (true, false),
IncludeHidden::Both => (true, true),
};
self.args.include_hidden_columns = col;
self.args.include_hidden_rows = row;
self
}
pub fn with_fill_merged_cells_vertical(mut self, merge: bool) -> Self {
self.args.fill_merged_cells_vertical = merge;
self
}
pub fn with_fill_merged_cells_horizontal(mut self, merge: bool) -> Self {
self.args.fill_merged_cells_horizontal = merge;
self
}
pub fn with_fill_merged_cells(mut self, mode: FillMergedCells) -> Self {
let (horizontal, vertical) = match mode {
FillMergedCells::None => (false, false),
FillMergedCells::Horizontal => (true, false),
FillMergedCells::Vertical => (false, true),
FillMergedCells::Both => (true, true),
};
self.args.fill_merged_cells_horizontal = horizontal;
self.args.fill_merged_cells_vertical = vertical;
self
}
pub fn with_worksheet(mut self, worksheet_name: &str) -> Self {
self.args.worksheet = String::from(worksheet_name);
self
}
pub fn with_active_worksheet(mut self) -> Self {
self.args.active_worksheet = true;
self
}
pub fn with_trim_start(mut self) -> Self {
self.args.trim = match self.args.trim {
TrimSpaces::End => TrimSpaces::Both,
TrimSpaces::Start => TrimSpaces::Start,
TrimSpaces::Both => TrimSpaces::Both,
TrimSpaces::None => TrimSpaces::Start,
};
self
}
pub fn with_trim_end(mut self) -> Self {
self.args.trim = match self.args.trim {
TrimSpaces::End => TrimSpaces::End,
TrimSpaces::Start => TrimSpaces::Both,
TrimSpaces::Both => TrimSpaces::Both,
TrimSpaces::None => TrimSpaces::End,
};
self
}
pub fn with_trim(mut self, trim: TrimSpaces) -> Self {
self.args.trim = trim;
self
}
pub fn with_number_rows(mut self, number_rows: NumberRows) -> Self {
self.args.number_rows = number_rows;
self
}
pub fn with_skip_rows(mut self, skip: u32) -> Self {
self.args.skip_rows = skip;
self
}
pub fn with_end_of_line(mut self, eol: String) -> Self {
self.args.end_of_line = eol;
self
}
pub fn with_replace_end_of_line_by(mut self, replacement: String) -> Self {
self.args.replace_end_of_line_by = Some(replacement);
self
}
}

@ -0,0 +1,120 @@
use std::{
fs::File,
io::{BufWriter, Write, stderr, stdout},
};
use crate::{error::Error, xlsx::XlsxReader};
#[derive(Clone, Debug)]
pub struct XlsxToCsvLines {
xlsx_reader: XlsxReader,
current_row: u32,
num_rows: u32,
separator: String,
end_of_line: String,
}
impl Iterator for XlsxToCsvLines {
type Item = Result<String, Error>;
fn next(&mut self) -> Option<Self::Item> {
if self.current_row > self.num_rows {
return None;
}
let mut row = self.xlsx_reader.get_row(self.current_row);
match &self.xlsx_reader.args.replace_separator_by {
Some(replacement) => {
row = row
.iter()
.map(|v| v.replace(self.xlsx_reader.args.separator, replacement.as_str()))
.collect()
}
None => {
if row
.iter()
.any(|v| v.contains(self.xlsx_reader.args.separator))
{
return Some(Err(
"Some cells contains the separator char. Use a replacement for separator char inside cells.".into(),
));
}
}
}
self.current_row += 1;
let mut row = row.join(self.separator.as_str());
match &self.xlsx_reader.args.replace_end_of_line_by {
Some(replacement) => row = row.replace(self.end_of_line.as_str(), replacement),
None => {
if row.contains(self.end_of_line.as_str()) {
return Some(Err("Some cells contains the end of line char. Use a replacement for end of line char inside cells.".into()));
}
}
}
let row = row + self.end_of_line.as_str();
Some(Ok(row))
}
}
pub enum Output {
File(String),
Stdout,
Stderr,
}
pub trait IntoOutput {
fn into_output(self) -> Output;
}
impl IntoOutput for Output {
fn into_output(self) -> Output {
self
}
}
impl IntoOutput for String {
fn into_output(self) -> Output {
Output::File(self)
}
}
impl IntoOutput for &str {
fn into_output(self) -> Output {
Output::File(String::from(self))
}
}
impl XlsxReader {
pub fn to_csv_lines(mut self) -> Result<XlsxToCsvLines, Error> {
self.finish()?;
let num_rows = self.get_worksheet_dimensions().1;
let end_of_line = self.args.end_of_line.clone();
let separator = String::from(self.args.separator);
Ok(XlsxToCsvLines {
xlsx_reader: self,
current_row: 0,
num_rows,
separator,
end_of_line,
})
}
pub fn to_csv(self, output: impl IntoOutput) -> Result<(), Error> {
let output = output.into_output();
let mut writer: Box<dyn Write> = match output {
Output::File(filename) => Box::new(BufWriter::new(File::open(filename)?)),
Output::Stdout => Box::new(BufWriter::new(stdout().lock())),
Output::Stderr => Box::new(BufWriter::new(stderr().lock())),
};
for line in self.to_csv_lines()? {
writer.write_all(line?.as_bytes())?;
}
Ok(())
}
}

@ -0,0 +1,51 @@
use crate::{error::Error, xlsx::XlsxReader};
use polars::prelude::*;
impl From<PolarsError> for Error {
fn from(value: PolarsError) -> Self {
Error::PolarsError(value.to_string())
}
}
impl XlsxReader {
pub fn to_lazyframe(mut self) -> Result<LazyFrame, Error> {
self.finish()?;
let (num_cols, num_rows) = self.get_worksheet_dimensions();
let mut columns: Vec<Column> = (0..num_cols)
.map(|i| Column::new(format!("column_{i}").into(), Vec::<String>::new()))
.collect();
const CHUNK_SIZE: usize = 1000;
let mut chunk: Vec<Vec<String>> = Vec::with_capacity(CHUNK_SIZE);
for current_row in 0..num_rows {
let row = self.get_row(current_row);
chunk.push(row);
if chunk.len() >= CHUNK_SIZE {
append_chunk(&mut columns, &chunk);
chunk.clear();
}
}
if !chunk.is_empty() {
append_chunk(&mut columns, &chunk);
}
let df =
DataFrame::new(num_rows as usize, columns).map_err(|e| Error::from(e.to_string()))?;
Ok(df.lazy())
}
}
fn append_chunk(columns: &mut [Column], chunk: &[Vec<String>]) {
for (col_idx, column) in columns.iter_mut().enumerate() {
let chunk_data: Vec<String> = chunk.iter().map(|row| row[col_idx].clone()).collect();
let chunk_column = Column::new("temp".into(), chunk_data);
column.append(&chunk_column).unwrap();
}
}
Loading…
Cancel
Save