python查询POS后台获取指定时间和状态的订单存入到excel表格中
- #!/usr/bin/env python
- # coding=utf-8
- # 查询pos后台指定时间和状态的订单
-
- import requests
- import re
- from lxml import etree
- from openpyxl import Workbook
- import time,os
- from requests.packages import urllib3
- from datetime import datetime
-
- class POS_crawl(object):
- def __init__(self, username, password):
- # 初始化headers实例
- self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
- "Referer": "https://pos.XXXX.com/login.html"
- }
- # 核实界面
- self.post_url = "https://pos.XXXXX.com/j_spring_security_check"
- # session会话对象
- self.session = requests.session()
- self.username = username
- self.password = password
- self.data = ['序号','订单号','卖家','客户','订单金额','下单时间','订单来源','订单状态','签收时间']
- self.wb = Workbook()
- # 去掉警告
- urllib3.disable_warnings()
-
- # 提取表单登录信息(可以和模拟登陆合并成一个)
- def get_login_info(self):
- data = {
- "j_username": self.username,
- "j_password": self.password
- }
- return data
- # 模拟登录
- def login(self):
- data = self.get_login_info()
- headers = {
- 'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);',
- "Referer": self.post_url,
- "X-Requested-With": "XMLHttpRequest",
- }
- try:
- login_page = self.session.post(self.post_url, data=data, headers=headers,verify=False)
- # print(login_page.text)
- if "loginerror" in login_page.text:
- print("登录失败,错误的手机号码或密码!")
-
- if "</span>首页" in login_page.text:
- print("欢迎您'%s',成功登陆POS管理系统!"%self.username)
- self.order_list()
- except Exception as e:
- print(e)
- # 处理订单
- def order_list(self):
- # 获取当天日期
- file_name_date = time.strftime("%Y-%m-%d", time.localtime())
- # 日期格式正则规则
- patt = re.compile(r"\d{4}-\d{1,2}-\d{1,2}")
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
- "Referer": "https://pos.XXXX.com/order/tolist.html",
- }
- for state in ['100','40']:
- while True:
- print("\n" + "=" * 75)
- print("开始日期如果不输入则为2015年最早日期,假设为2015-1-1,结束日期如果不输入则为当天日期。")
- print("注意:已完成订单中,如果开始和结束日期都不输入,则把签收日期为空的也会筛选出来。")
- print("=" * 75 + "\n")
- if state == "100":
- # self.wb.sheetnames显示sheet表名字,防止输错日期循环的时候多次创建ws
- if '已完成订单列表'not in self.wb.sheetnames:
- ws = self.wb.active
- ws.title = '已完成订单列表' # 修改第一个sheet表名为销售企业数量列表
- start_date = input("请输入已完成签收的开始日期(格式:2018-11-30):")
- end_date = input("请输入已完成签收的结束日期(格式:2018-11-30):")
- ws.column_dimensions['I'].width = 19
- else:
- if '配送中订单列表'not in self.wb.sheetnames:
- ws = self.wb.create_sheet('配送中订单列表',index=1) # 插入新的工作表,放在1位置
- start_date = input("请输入配送中的下单开始日期(格式:2018-11-30):")
- end_date = input("请输入配送中的下单结束日期(格式:2018-11-30):")
- re_start_date = patt.findall(start_date)
- re_end_date = patt.findall(end_date)
- if start_date !='':
- if len(re_start_date)!=0:
- # 结束日期可以不输入
- if end_date =='':
- break
- else:
- if len(re_end_date) == 0:
- print("ERROR:结束日期格式输入有误,请重新输入!")
- continue
- else:
- break
- else:
- if end_date != '':
- if len(re_end_date) == 0:
- print("ERROR:开始和结束日期格式输入有误,请重新输入!")
- continue
- else:
- print("ERROR:开始日期格式输入有误,请重新输入!")
- continue
- else:
- # re_end_date = patt.findall(end_date)
- if end_date != '':
- if len(re_end_date) == 0:
- print("ERROR:结束日期格式输入有误,请重新输入!")
- continue
- else:
- break
- else:
- break
- # 先写入表头数据
- for head in range(1, len(self.data) + 1):
- _ = ws.cell(row=1, column=head, value=self.data[head - 1])
-
- # 我们将查询日期的区间写在第一行第10列
- if end_date == '':
- end_date = file_name_date
- if start_date == '':
- start_date = '2015-1-1'
- if state == "100":
- select_date = "查询签收日期:"+start_date+"至"+end_date
- ws.cell(row=1, column=10, value=select_date)
- else:
- select_date = "查询下单日期:" + start_date + "至" + end_date
- ws.cell(row=1,column=9,value=select_date)
- # 冻结首行
- ws.freeze_panes = 'A2'
- ws.column_dimensions['A'].width = 6
- ws.column_dimensions['B'].width = 10
- ws.column_dimensions['C'].width = 45
- ws.column_dimensions['D'].width = 33
- ws.column_dimensions['E'].width = 10
- ws.column_dimensions['F'].width = 19
- ws.column_dimensions['G'].width = 12
- self.process_product(start_date,end_date,state,ws,headers)
- self.wb.save('订单数量列表'+file_name_date+'.xlsx')
- address = os.path.abspath('订单数量列表'+file_name_date+'.xlsx')
- print("数据处理完毕,表格已经保存到该路径下:%s"%address)
- self.session.post('https://pos.XXXX.com/j_spring_security_logout',headers=headers,verify=False)
- print("退出后台系统....")
- # 获取数据
- def process_product(self,start_date,end_date,state,ws,headers):
- formdata = {
- # "d-5481-p": "1",
- "to.buyer": "",
- "to.endDate": end_date,
- "to.id": "",
- # 订单状态,-1代表全部
- "to.orderSource": "-1",
- "to.seller": "",
- "to.startDate": start_date,
- # 已完成是100,配送中是40
- "to.status": state,
- }
- time.sleep(1)
-
- # headers = {
- # "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
- # "Referer": "https://pos.XXXXX.com/order/tolist.html",
- # }
- get_page = self.session.post("https://pos.XXXXX.com/order/tolist.html", data=formdata, headers=headers,verify=False)
- # 利用xpath规则提取查询结果信息
- get_page_html = etree.HTML(get_page.text)
- # 获取查询结果的数据,计算有多少页,是一个列表
- page_list = get_page_html.xpath('//*[@id="signForm"]/span[1]/text()')
- # with open("557.html", "w", encoding="utf-8")as f:
- # f.write(get_page.text)
- if len(page_list) ==0:
- print("抱歉,没有查询到相应条件的商品!")
- else:
- # 如果只有一条数据会显示One
- if page_list[0].split(" ")[0] != "One":
- total = int(page_list[0].split(" ")[0].replace(",",""))
- else:
- total = 1
- if total % 20 == 0:
- end_page = total // 20
- print('共获取到%d项订单数据,合计%d页' % (total, end_page))
- else:
- end_page = total // 20 + 1
- print('共获取到%d项订单数据,合计%d页' % (total, end_page))
- for page in range(1,end_page+1):
- product_list = []
- order_list_url = "https://pos.XXXX.com/order/tolist.html?d-5481-p="+str(page)+"&to.status=" + state + "&to.orderSource=-1&to.buyer=&to.startDate=" + start_date + "&to.id=&to.seller=&to.endDate=" + end_date
- order_list = self.session.post(order_list_url,headers=headers, verify=False)
- get_order_html = etree.HTML(order_list.text)
- # 获取序号
- number_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[1]/text()')
- number_result = []
- [number_result.append(int(number)) for number in number_]
- # 获取订单号
- order_number_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[2]/a/text()')
- order_number_result = []
- [order_number_result.append(int(order_number)) for order_number in order_number_]
- # 获取卖家
- order_company_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[3]/a/text()')
- order_company_result = []
- [order_company_result.append(order_company.replace("\n", "").replace("\t", "").replace(" ", "").strip()) for
- order_company in order_company_]
- # 获取客户
- order_client_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[4]/text()')
- order_client_result =[]
- [order_client_result.append(order_client.replace("\n", "").replace("\t", "")) for order_client in
- order_client_]
- # 获取订单金额
- order_money_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[5]/text()')
- order_money_result = []
- [order_money_result.append(float(order_money)) for order_money in order_money_]
- # 获取下单时间,只有这个首尾没有空格
- order_time = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[6]/text()')
- # 订单来源
- order_source_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[10]/text()')
- order_source_result = []
- [order_source_result.append(order_source.replace("\n", "").replace("\t", "")) for order_source in
- order_source_]
- # 订单状态
- order_state_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[11]/text()')
- order_state_result = []
- [order_state_result.append(order_state.replace("\n", "").replace("\t", "").replace(" ","")) for order_state in
- order_state_]
- product_list.append(number_result)
- product_list.append(order_number_result)
- product_list.append(order_company_result)
- product_list.append(order_client_result)
- product_list.append(order_money_result)
- product_list.append(order_time)
- product_list.append(order_source_result)
- product_list.append(order_state_result)
- # 存储签收时间,首尾无空格
- sign_time_result = []
- # 这样获取的是xpath对象,没加/text是因为这样也能获取到空的值
- sign_time_element = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[7]')
- [sign_time_result.append(sign_time.text) for sign_time in sign_time_element]
- product_list.append(sign_time_result)
- # 已完成才有签收时间
- # if state == '100':
- # sign_time = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[7]/text()')
- # product_list.append(sign_time)
- # else:
- # sign_time = ("*"*(len(number)-1)).split("*")
- # product_list.append(sign_time)
- row_start = [int(k) for k in number_][0]+1
- row_end = [int(k) for k in number_][-1]+1
- print("第%d页数据写入表格中...."%page)
- for col in range(1, len(product_list)+1):
- for row in range(row_start, row_end+1):
- # print("写入第%d行第%d列数据..." % (row, col))
- _ = ws.cell(row=row, column=col, value=(product_list[col - 1][row-2-(page-1)*20]))
- print("此项数据处理完成。")
- if __name__ == "__main__":
- username = input("请输入登录账号:")
- password = input("请输入登录密码:")
- az = POS_crawl(username, password)
- az.login()
- input("请按回车键退出程序。。")
-