Source code for croco.xTable
# -*- coding: utf-8 -*-
"""
Functions to read and write xTable data.
"""
import pandas as pd
if __name__ == '__main__' or __name__ =='xTable':
import HelperFunctions as hf
else:
from . import HelperFunctions as hf
def _join_list_by_semicolon(entry):
if isinstance(entry, list):
return ';'.join([str(x) for x in entry])
else:
return entry
[docs]def Write(xtable, outpath, col_order=None, compact=False):
"""
writes an xtable data structure to file (in xlsx format)
Args:
xtable: data table structure
outpath to write file (w/o file extension!)
col_order (list): List of xTable column titles that are used to sort and compress the resulting datatable
compact (bool): Whether to compact the xTable to only those columns listed in col_order
"""
# select only object dtypes as lists will anyways be found only in those
# and applymap struggles with nullable int64 dtype
xtable.loc[:,xtable.dtypes == 'object'] = xtable.loc[:,xtable.dtypes == 'object'].applymap(_join_list_by_semicolon)
xtable = hf.order_columns(xtable, col_order, compact)
xtable.to_excel(hf.compatible_path(outpath) + '.xlsx',
index=False)
[docs]def Read(xTable_files, col_order=None, compact=False):
"""
Read an xTable data structure from file
Args:
xTable_files: path to the xtable file(s)
col_order (list): List of xTable column titles that are used to sort and compress the resulting datatable
compact (bool): Whether to compact the xTable to only those columns listed in col_order
Returns:
xtable: xTable dataframe object
"""
# convert to list if the input is only a single path
if not isinstance(xTable_files, list):
xTable_files = [xTable_files]
allData = list()
for file in xTable_files:
try:
s = pd.read_excel(hf.compatible_path(file))
allData.append(s)
except:
raise Exception('[xTable Read] Failed opening file: {}'.format(file))
xtable = pd.concat(allData, sort=False)
# convert only those columns to lists where lists are expected
xtable[['modmass1','modmass2']] = xtable[['modmass1', 'modmass2']]\
.applymap(lambda x: hf.convert_to_list_of(x, float))
xtable[['modpos1', 'modpos2']] = xtable[['modpos1' ,'modpos2']]\
.applymap(lambda x: hf.convert_to_list_of(x, int))
xtable[['mod1', 'mod2']] = xtable[['mod1', 'mod2']]\
.applymap(lambda x: hf.convert_to_list_of(x, str))
xtable = hf.order_columns(xtable, col_order, compact)
xtable = xtable.apply(pd.to_numeric, errors = 'ignore')
return xtable
if __name__ == '__main__':
xtable = Read(r'C:\Users\User\Documents\02_experiments\05_croco_dataset\002_20180425\crosslink_search\pLink2_reports_xtable.xlsx')