https://blog.csdn.net/zhouz92/article/details/107179616
from docx import Document# Document 类,不仅可以新建word文档,也可以打开一个本地文档doc = Document('test03.docx')tables = doc.tables # 获取文档中所有表格对象的列表print(tables)# [<docx.table.Table object at 0x000001957059CD48>]print(len(tables)) # 查看文档中表格数量# 1table0 = tables[0] # 获取表格对象# 获取表格的样式信息print(table0.style)# _TableStyle('Normal Table') id: 190621384# 获取一个表格的所有单元格cells = table0._cellsprint(len(cells)) # 表格中单元格数量# 15# 获取单元格内所有文字信息cells_string = [cell.text for cell in cells]print(cells_string)# 获取表格对象行数量、列数量col_num = len(table0.columns)print(col_num) # 3# 行数量row_num = len(table0.rows)print(row_num) # 5# 获取行对象row0 = table0.rows[0]# 获取列对象col0 = table0.columns[0]# 获取行对象文字信息'要用 row0.cells 获取行对象的 cell 才能获取其文字信息'row0_string = [cell.text for cell in row0.cells]print(row0_string)# 获取列对象文字信息col0_string = [cell.text for cell in col0.cells]print(col0_string)#原文链接:https://blog.csdn.net/zhouz92/article/details/107179616
https://www.jb51.net/article/143936.htm
https://www.jb51.net/article/167040.htm
https://zhuanlan.zhihu.com/p/158806667
https://blog.csdn.net/zhouz92/article/details/106883774
https://www.cnblogs.com/danhuai/p/11700407.html
import xlrd# 打开文件方式1:work_book = xlrd.open_workbook('test01.xls')# 按索引获取sheet对象sheet_1 = work_book.sheet_by_index(0)print(sheet_1)# ------运行结果------# <xlrd.sheet.Sheet object at 0x000001CE3473C550># ------运行结果------# 按sheet表名称获取sheet对象,名称分大小写sheet_2 = work_book.sheet_by_name('Sheet2')print(sheet_2)# 按行读取data_row = []for row in range(sheet_1.nrows):data_row.append(sheet_1.row_values(row))print(data_row)# 按列读取data_col = [sheet_1.col_values(i) for i in range(sheet_1.ncols)]print(data_col)# 按行读取test01.xls 所有 sheet 表数据all_data = {}for i,sheet_obj in enumerate(work_book.sheets()):all_data[i] = [sheet_obj.row_values(row)for row in range(sheet_obj.nrows)]print(all_data)#原文链接:https://blog.csdn.net/zhouz92/article/details/106883774
import osimport shutilfrom openpyxl.reader.excel import load_workbookdef read_excel_with_openpyxl(filename="C:/pubip-2021-03-17.xlsx"):wb = load_workbook(filename)sheetnames = wb.get_sheet_names()print(sheetnames)data_dic = {}for sname in sheetnames:ws = wb.get_sheet_by_name(sname)print(ws.title)print(ws.max_row)print(ws.max_column)t2 = []for cell in ws.values:# print(cell)if len(cell) > 3:print(cell[0])t2.append(cell[0])print(t2)data_dic[sname] = t2print(data_dic)return data_dicdef mv_docx2_project_dir(bugdir, data):for k, v in data.items():# mkdir kbug_project = os.path.join(bugdir, k)if not os.path.exists(bug_project):os.mkdir(bug_project)for docx in v:# mv docx into kbug_file = os.path.join(bugdir, docx + ".docx")if os.path.exists(bug_file):shutil.move(bug_file, bug_project)newbug_file = os.path.join(bugdir, k, docx + ".docx")if not os.path.exists(newbug_file):print('not exist: ', newbug_file)def doc2docx():passdef read_docx(dirname):passdef main():data = read_excel_with_openpyxl()bugdir = "c:/2021-03-12-ttt-漏洞列表"mv_docx2_project_dir(bugdir, data)if __name__ == "__main__":main()
