first REAL commit

This commit is contained in:
Brennen Raimer
2019-09-12 21:50:24 -04:00
parent ef0728166a
commit 0930c01f19
5 changed files with 213 additions and 2 deletions

17
.gitignore vendored
View File

@@ -102,3 +102,20 @@ venv.bak/
# mypy # mypy
.mypy_cache/ .mypy_cache/
# Created by https://www.gitignore.io/api/visualstudiocode
# Edit at https://www.gitignore.io/?templates=visualstudiocode
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
# End of https://www.gitignore.io/api/visualstudiocode
.vscode/settings.json

View File

@@ -1,2 +1,3 @@
# DataFrame-to-Autofit-Xlsx # DataFrame to Autosize Excel
Output your Pandas DataFrame in an xlsx file with columns automatically fit to the data
Output your Pandas DataFrame in an xlsx file with columns automatically fit to the data.

View File

@@ -0,0 +1 @@
from .dataframe_to_autosize_excel import excel_column_width, maximum_character_widths, to_autosize_excel

View File

@@ -0,0 +1,172 @@
from os import PathLike
from os.path import expandvars
from pathlib import Path
from typing import Union, Sequence, List, Tuple
from pandas import DataFrame, ExcelWriter
def to_autosize_excel(df: DataFrame,
outfile: PathLike,
consider_headers: bool = True,
sheet_name: str='Sheet1',
na_rep: str='',
float_format: str=None,
columns: Union[Sequence[str], List[str]]=None,
header: Union[bool, List[str]]=True,
index: bool=True,
index_label: Union[str, Sequence]=None,
startrow: int=0,
startcol: int=0,
inf_rep: str='inf',
verbose: bool=True,
freeze_panes: Tuple[int,int]=None,
excel_date_format: str = "yyyy-mm-dd",
excel_datetime_format: str = "yyyy-mm-dd hh:mm:ss",
mode: str='w')-> Path:
"""
Arguments:
df {DataFrame} -- The data to be output into an xlsx file
outfile {PathLike} -- A pathlike object representing the full path and filename of the output xlsx file
Keyword Arguments:
consider_headers {bool} -- If true, consider the width of the column headers when sizing columns (default: {True})
sheet_name {str} -- The sheet of the workbook to write the data(default: {'Sheet1'})
na_rep {str} -- How null values should be represented in the output (default: {''})
float_format {str} -- Format string for floating point numbers. (default: {None})
columns {Union[Sequence[str], List[str]]} -- If given, only these columns will be written to the file (default: {None})
header {Union[bool, List[str]]} -- [description] (default: {True})
index {bool} -- If true, write the index columns in the output (default: {True})
index_label {Union[str, Sequence]} -- Alternative column headers for index columns. (default: {None})
startrow {int} -- The zero-indexed row of the xlsx file to begin writing data (default: {0})
startcol {int} -- The zero-indexed column of the xlsx file to begin writing data (default: {0})
inf_rep {str} -- How the value of infinity will be represnted in the output (default: {'inf'})
verbose {bool} -- Display more information in the error logs. (default: {True})
freeze_panes {Tuple[int,int]} -- Specifies the one-based bottommost row and rightmost column that is to be frozen. (default: {None})
excel_date_format {str} -- Format string for dates written into Excel files (default: {"yyyy-mm-dd"})
excel_datetime_format {str} -- Format string for datetime objects written into Excel files (default: {"yyyy-mm-dd hh:mm:ss"})
mode {str} -- Must equal 'w' (write) or 'a' (append) (default: {'w'})
Returns:
Path -- A Path object representing the successfully written xlsx output
"""
#we don't want to pass df or outfile as kwargs later
kwargs = {k:v for k,v in zip(list(locals().keys())[3:], list(locals().values())[3:])}
#construct the ExcelWriter, removing its kwargs as they will no longer be needed
writer = ExcelWriter(str(Path(expandvars(outfile))),
engine="xlsxwriter",
date_format=kwargs.pop("excel_date_format"),
datetime_format=kwargs.pop("excel_datetime_format"),
mode=kwargs.pop("mode"))
#This just makes things easier later, trust me. Also df is probably mutable, so not even risking screwing it up!
if kwargs["columns"]:
data = df[list(kwargs["columns"])]
else:
data = df
with writer:
#only kwargs left should be kwargs of df.to_excel
df.to_excel(writer, **kwargs)
wb = writer.book
ws = writer.sheets[kwargs["sheet_name"]]
if isinstance(columns, bool): #Use the DataFrame's existing labels
'''if also going to write index, mash it into the dataframe and just get the
index level names as columns'''
if index:
labels = data.reset_index().columns.to_list()
else:
labels = data.columns.to_list()
else: #Use provided alternative labels
if index and index_label: #Use provided index label(s)
if isinstance(index_label, str):
labels = [index_label] + list(columns)
else:
labels = list(index_label) + list(columns)
elif index and not index_label: #Use existing index name(s) as label(s) with alternative column labels
#a labeless index has a Nonetype name, which converts to the string "None". I prefer the empty string.
labels = [str(name) if name else "" for name in data.index.names] + list(columns)
else:
labels = list(columns)
if index: #much easier to get widths if you just treat the index like regular columns
widths = maximum_character_widths(data.reset_index(), consider_headers, labels)
else:
widths = maximum_character_widths(data, consider_headers, labels)
#size columns using calculated best-fit widths
for column in range(startcol, startcol+len(labels)):
if index:
column_name = data.reset_index().columns[column]
else:
column_name = data.columns[column]
column_width = widths[column_name]
ws.set_column(column, column, excel_column_width(column_width))
#re-write the columns with a custom format that wraps text if columns headers were not considered in sizing of columns
if columns and not consider_headers:
f = wb.add_format({"text_wrap":True, "bold":True, "align":"center", "valign":"vcenter", "border":1})
ws.write_row(startrow, startcol, labels, f)
return Path(writer.path)
def maximum_character_widths(df: DataFrame, consider_headers: bool = True, alternate_headers: Union[list,dict] = None) -> dict:
"""Gets the maximum character width (i.e. the length of the string) of a column in a dataframe. Optionally considers the headers when determining the maximum width
Arguments:
df {DataFrame} -- The input data
Keyword Arguments:
consider_headers {bool} -- If true, consider the column header when determining maximum width. (default: {True})
alternate_headers {Union[list,dict]} -- If present, is equivalent to consider_headers = True, except these values will be considered instead of column labels. (default: {None})
Raises:
ValueError: Raised if the number of alternative column headers does not match the number of columns in the dataframe
TypeError: Raised if alternative headers is not a list or dictionary
Returns:
dict -- A dictionary of character widths by column header
"""
widths = {}
if isinstance(alternate_headers, list):
if len(alternate_headers) != len(df.columns):
raise ValueError("The number of labels must equal the number of columns in the dataframe")
else:
headers = {k:v for k,v in zip(df.columns, alternate_headers)}
elif isinstance(alternate_headers, dict):
if len(alternate_headers.keys()) != len(df.columns):
raise ValueError("The number of labels must equal the number of columns in the dataframe")
else:
headers = alternate_headers
elif alternate_headers == None and consider_headers:
headers = {v:v for v in df.columns}
else:
raise TypeError("Alternative headers must be a list or dictionary")
for key,value in headers.items():
if consider_headers:
widths[key] = max(len(value), df[key].astype(str).str.len().max())
else:
widths[key] = df[key].astype(str).str.len().max()
return widths
def excel_column_width(charwidth:int, fontsize:float=11) -> float:
"""Converts a character width to a an Excel column width based on the font size
Arguments:
charwidth {int} -- The number of characters in the cell value to fit the column to
Keyword Arguments:
fontsize {float} -- The font size of the cell to fit. (default: {11})
Returns:
float -- The value of a close-enough Excel column width
"""
#emperically derived from observation of excel. At best this is an approximation that errs on the side of slightly oversized
return charwidth * round(0.118775 * fontsize, 2)

20
setup.py Normal file
View File

@@ -0,0 +1,20 @@
setup_args={
'name':'dataframe to autosize excel',
'version':'1.0',
'description':'Output pandas DataFrames into Excel Xlsx files with autofitted columns',
'author':'Brennen Raimer',
'url':'https://github.com/norweeg'
}
try:
from setuptools import setup, find_packages
except ImportError:
from distutils.core import setup
setup_args['packages'] = ["dataframe_to_autosize_excel"]
else:
setup_args['packages'] = find_packages(exclude = ['contrib', 'docs', 'tests','reports','examples'])
setup_args['project_urls'] = {'Source':'https://github.com/norweeg/DataFrame-to-Autofit-Xlsx'}
setup_args['install_requires'] = ['pandas', 'xlsxwriter']
setup_args['zip_safe'] = False
finally:
setup(**setup_args)