first REAL commit
This commit is contained in:
17
.gitignore
vendored
17
.gitignore
vendored
@@ -102,3 +102,20 @@ venv.bak/
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
# Created by https://www.gitignore.io/api/visualstudiocode
|
||||
# Edit at https://www.gitignore.io/?templates=visualstudiocode
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
|
||||
### VisualStudioCode Patch ###
|
||||
# Ignore all local history of files
|
||||
.history
|
||||
|
||||
# End of https://www.gitignore.io/api/visualstudiocode
|
||||
.vscode/settings.json
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
# DataFrame-to-Autofit-Xlsx
|
||||
Output your Pandas DataFrame in an xlsx file with columns automatically fit to the data
|
||||
# DataFrame to Autosize Excel
|
||||
|
||||
Output your Pandas DataFrame in an xlsx file with columns automatically fit to the data.
|
||||
|
||||
1
dataframe_to_autosize_excel/__init__.py
Normal file
1
dataframe_to_autosize_excel/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .dataframe_to_autosize_excel import excel_column_width, maximum_character_widths, to_autosize_excel
|
||||
172
dataframe_to_autosize_excel/dataframe_to_autosize_excel.py
Normal file
172
dataframe_to_autosize_excel/dataframe_to_autosize_excel.py
Normal file
@@ -0,0 +1,172 @@
|
||||
from os import PathLike
|
||||
from os.path import expandvars
|
||||
from pathlib import Path
|
||||
from typing import Union, Sequence, List, Tuple
|
||||
|
||||
from pandas import DataFrame, ExcelWriter
|
||||
|
||||
|
||||
def to_autosize_excel(df: DataFrame,
|
||||
outfile: PathLike,
|
||||
consider_headers: bool = True,
|
||||
sheet_name: str='Sheet1',
|
||||
na_rep: str='',
|
||||
float_format: str=None,
|
||||
columns: Union[Sequence[str], List[str]]=None,
|
||||
header: Union[bool, List[str]]=True,
|
||||
index: bool=True,
|
||||
index_label: Union[str, Sequence]=None,
|
||||
startrow: int=0,
|
||||
startcol: int=0,
|
||||
inf_rep: str='inf',
|
||||
verbose: bool=True,
|
||||
freeze_panes: Tuple[int,int]=None,
|
||||
excel_date_format: str = "yyyy-mm-dd",
|
||||
excel_datetime_format: str = "yyyy-mm-dd hh:mm:ss",
|
||||
mode: str='w')-> Path:
|
||||
"""
|
||||
|
||||
Arguments:
|
||||
df {DataFrame} -- The data to be output into an xlsx file
|
||||
outfile {PathLike} -- A pathlike object representing the full path and filename of the output xlsx file
|
||||
|
||||
Keyword Arguments:
|
||||
consider_headers {bool} -- If true, consider the width of the column headers when sizing columns (default: {True})
|
||||
sheet_name {str} -- The sheet of the workbook to write the data(default: {'Sheet1'})
|
||||
na_rep {str} -- How null values should be represented in the output (default: {''})
|
||||
float_format {str} -- Format string for floating point numbers. (default: {None})
|
||||
columns {Union[Sequence[str], List[str]]} -- If given, only these columns will be written to the file (default: {None})
|
||||
header {Union[bool, List[str]]} -- [description] (default: {True})
|
||||
index {bool} -- If true, write the index columns in the output (default: {True})
|
||||
index_label {Union[str, Sequence]} -- Alternative column headers for index columns. (default: {None})
|
||||
startrow {int} -- The zero-indexed row of the xlsx file to begin writing data (default: {0})
|
||||
startcol {int} -- The zero-indexed column of the xlsx file to begin writing data (default: {0})
|
||||
inf_rep {str} -- How the value of infinity will be represnted in the output (default: {'inf'})
|
||||
verbose {bool} -- Display more information in the error logs. (default: {True})
|
||||
freeze_panes {Tuple[int,int]} -- Specifies the one-based bottommost row and rightmost column that is to be frozen. (default: {None})
|
||||
excel_date_format {str} -- Format string for dates written into Excel files (default: {"yyyy-mm-dd"})
|
||||
excel_datetime_format {str} -- Format string for datetime objects written into Excel files (default: {"yyyy-mm-dd hh:mm:ss"})
|
||||
mode {str} -- Must equal 'w' (write) or 'a' (append) (default: {'w'})
|
||||
|
||||
Returns:
|
||||
Path -- A Path object representing the successfully written xlsx output
|
||||
"""
|
||||
#we don't want to pass df or outfile as kwargs later
|
||||
kwargs = {k:v for k,v in zip(list(locals().keys())[3:], list(locals().values())[3:])}
|
||||
|
||||
#construct the ExcelWriter, removing its kwargs as they will no longer be needed
|
||||
writer = ExcelWriter(str(Path(expandvars(outfile))),
|
||||
engine="xlsxwriter",
|
||||
date_format=kwargs.pop("excel_date_format"),
|
||||
datetime_format=kwargs.pop("excel_datetime_format"),
|
||||
mode=kwargs.pop("mode"))
|
||||
|
||||
#This just makes things easier later, trust me. Also df is probably mutable, so not even risking screwing it up!
|
||||
if kwargs["columns"]:
|
||||
data = df[list(kwargs["columns"])]
|
||||
else:
|
||||
data = df
|
||||
|
||||
with writer:
|
||||
#only kwargs left should be kwargs of df.to_excel
|
||||
df.to_excel(writer, **kwargs)
|
||||
wb = writer.book
|
||||
ws = writer.sheets[kwargs["sheet_name"]]
|
||||
|
||||
if isinstance(columns, bool): #Use the DataFrame's existing labels
|
||||
'''if also going to write index, mash it into the dataframe and just get the
|
||||
index level names as columns'''
|
||||
if index:
|
||||
labels = data.reset_index().columns.to_list()
|
||||
else:
|
||||
labels = data.columns.to_list()
|
||||
else: #Use provided alternative labels
|
||||
if index and index_label: #Use provided index label(s)
|
||||
if isinstance(index_label, str):
|
||||
labels = [index_label] + list(columns)
|
||||
else:
|
||||
labels = list(index_label) + list(columns)
|
||||
elif index and not index_label: #Use existing index name(s) as label(s) with alternative column labels
|
||||
#a labeless index has a Nonetype name, which converts to the string "None". I prefer the empty string.
|
||||
labels = [str(name) if name else "" for name in data.index.names] + list(columns)
|
||||
else:
|
||||
labels = list(columns)
|
||||
|
||||
if index: #much easier to get widths if you just treat the index like regular columns
|
||||
widths = maximum_character_widths(data.reset_index(), consider_headers, labels)
|
||||
else:
|
||||
widths = maximum_character_widths(data, consider_headers, labels)
|
||||
|
||||
#size columns using calculated best-fit widths
|
||||
for column in range(startcol, startcol+len(labels)):
|
||||
if index:
|
||||
column_name = data.reset_index().columns[column]
|
||||
else:
|
||||
column_name = data.columns[column]
|
||||
column_width = widths[column_name]
|
||||
ws.set_column(column, column, excel_column_width(column_width))
|
||||
|
||||
#re-write the columns with a custom format that wraps text if columns headers were not considered in sizing of columns
|
||||
if columns and not consider_headers:
|
||||
f = wb.add_format({"text_wrap":True, "bold":True, "align":"center", "valign":"vcenter", "border":1})
|
||||
ws.write_row(startrow, startcol, labels, f)
|
||||
|
||||
|
||||
return Path(writer.path)
|
||||
|
||||
def maximum_character_widths(df: DataFrame, consider_headers: bool = True, alternate_headers: Union[list,dict] = None) -> dict:
|
||||
"""Gets the maximum character width (i.e. the length of the string) of a column in a dataframe. Optionally considers the headers when determining the maximum width
|
||||
|
||||
Arguments:
|
||||
df {DataFrame} -- The input data
|
||||
|
||||
Keyword Arguments:
|
||||
consider_headers {bool} -- If true, consider the column header when determining maximum width. (default: {True})
|
||||
alternate_headers {Union[list,dict]} -- If present, is equivalent to consider_headers = True, except these values will be considered instead of column labels. (default: {None})
|
||||
|
||||
Raises:
|
||||
ValueError: Raised if the number of alternative column headers does not match the number of columns in the dataframe
|
||||
TypeError: Raised if alternative headers is not a list or dictionary
|
||||
|
||||
Returns:
|
||||
dict -- A dictionary of character widths by column header
|
||||
"""
|
||||
widths = {}
|
||||
|
||||
if isinstance(alternate_headers, list):
|
||||
if len(alternate_headers) != len(df.columns):
|
||||
raise ValueError("The number of labels must equal the number of columns in the dataframe")
|
||||
else:
|
||||
headers = {k:v for k,v in zip(df.columns, alternate_headers)}
|
||||
elif isinstance(alternate_headers, dict):
|
||||
if len(alternate_headers.keys()) != len(df.columns):
|
||||
raise ValueError("The number of labels must equal the number of columns in the dataframe")
|
||||
else:
|
||||
headers = alternate_headers
|
||||
elif alternate_headers == None and consider_headers:
|
||||
headers = {v:v for v in df.columns}
|
||||
else:
|
||||
raise TypeError("Alternative headers must be a list or dictionary")
|
||||
|
||||
for key,value in headers.items():
|
||||
if consider_headers:
|
||||
widths[key] = max(len(value), df[key].astype(str).str.len().max())
|
||||
else:
|
||||
widths[key] = df[key].astype(str).str.len().max()
|
||||
|
||||
return widths
|
||||
|
||||
def excel_column_width(charwidth:int, fontsize:float=11) -> float:
|
||||
"""Converts a character width to a an Excel column width based on the font size
|
||||
|
||||
Arguments:
|
||||
charwidth {int} -- The number of characters in the cell value to fit the column to
|
||||
|
||||
Keyword Arguments:
|
||||
fontsize {float} -- The font size of the cell to fit. (default: {11})
|
||||
|
||||
Returns:
|
||||
float -- The value of a close-enough Excel column width
|
||||
"""
|
||||
#emperically derived from observation of excel. At best this is an approximation that errs on the side of slightly oversized
|
||||
return charwidth * round(0.118775 * fontsize, 2)
|
||||
20
setup.py
Normal file
20
setup.py
Normal file
@@ -0,0 +1,20 @@
|
||||
setup_args={
|
||||
'name':'dataframe to autosize excel',
|
||||
'version':'1.0',
|
||||
'description':'Output pandas DataFrames into Excel Xlsx files with autofitted columns',
|
||||
'author':'Brennen Raimer',
|
||||
'url':'https://github.com/norweeg'
|
||||
}
|
||||
|
||||
try:
|
||||
from setuptools import setup, find_packages
|
||||
except ImportError:
|
||||
from distutils.core import setup
|
||||
setup_args['packages'] = ["dataframe_to_autosize_excel"]
|
||||
else:
|
||||
setup_args['packages'] = find_packages(exclude = ['contrib', 'docs', 'tests','reports','examples'])
|
||||
setup_args['project_urls'] = {'Source':'https://github.com/norweeg/DataFrame-to-Autofit-Xlsx'}
|
||||
setup_args['install_requires'] = ['pandas', 'xlsxwriter']
|
||||
setup_args['zip_safe'] = False
|
||||
finally:
|
||||
setup(**setup_args)
|
||||
Reference in New Issue
Block a user