1、读取excel:使用xlrd库
1 2 3 4 5 6 7 8 9 10 11
| from urllib.parse import urlparse import xlrd file = "./网址.xls" workbook = xlrd.open_workbook(filename=file) table = workbook.sheet_by_name("Sheet1") domain_set = set() for i in range(table.nrows): url = table.row_values(i)[0] domain_no_http_www = urlparse(url).netloc.replace("www.", "") domain_set.add(domain_no_http_www) print(domain_set, len(domain_set))
|
注:如果出现 xlrd.biffh.XLRDError: Excel xlsx file; not supported,这是由于当前python中的xlrd版本过高导致的,需要降低版本
1
| pip3 install xlrd==1.2.0
|
2、写入excel库:使用xlwt库
1 2 3 4 5 6 7 8 9 10
| import xlwt book = xlwt.Workbook() sheet1 = book.add_sheet("Sheet1") row = 0 for domain in domain_set: if domain not in db_set: print(domain) sheet1.write(row, 0, domain) row += 1 book.save("./库中没有的domain.xls")
|
3、写入excel:使用XlsxWriter(只能写入.xlsx)
1 2 3 4 5 6 7 8 9 10 11 12
| import xlsxwriter header = ["title","url"] data = [["百度","baidu.com"],["搜狐","souhu.com"]] workbook = xlsxwriter.Workbook("./test.xlsx") sheet1 = workbook.add_worksheet("sheet1") sheet1.activate() sheet1.write_row("A1", header) row = 2 for per_data_list in data: sheet1.write_row(f"A{row}", per_data_list) row += 1 workbook.close()
|
4、合并单元格
1 2 3 4 5 6 7 8 9 10 11
| import pandas as pd from openpyxl import load_workbook df = pd.read_excel("./湖南监管网从业人员.xlsx", sheet_name=0) wb = load_workbook('./湖南监管网从业人员.xlsx') ws = wb[wb.sheetnames[0]] company_dict = df.groupby("公司名称").groups for company, rows_list in company_dict.items(): print(company, rows_list, rows_list[0], rows_list[-1]) ws.merge_cells(start_row=rows_list[0]+2, end_row=rows_list[-1]+2, start_column=1, end_column=1) ws.merge_cells(start_row=rows_list[0]+2, end_row=rows_list[-1]+2, start_column=2, end_column=2) wb.save('./湖南监管网从业人员2.xlsx')
|
5、追加写入多个sheet(不会删除原有的sheet)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| import os from openpyxl import Workbook, load_workbook if not os.path.exists("./test.xlsx"): wb_w = Workbook() wb_w.save("./test.xlsx")
wb = load_workbook("./test.xlsx") ws = wb.create_sheet('这是表1') ws.append(["姓名", "年龄", "性别"]) datas = [["张三", 93, "男"], ["小丽", 15, "女"], ["吴日新", 45, "男"]] for row in datas: ws.append(row)
ws = wb.create_sheet('这是表2222') ws.append(["姓名", "年龄", "性别"]) datas = [["张三", 93, "男"], ["小丽", 15, "女"], ["吴日新", 45, "男"]] for row in datas: ws.append(row)
try: del wb["Sheet"] except: pass
wb.save('./test.xlsx')
|