from PIL import Image
import requests
import json
import cv2
import pyzbar.pyzbar as pyzbar
import fitz
import sys
import io
import pandas as pd
import numpy as np
import time

if len(sys.argv) < 2:

sys.exit(0)

data = pd.read_excel(sys.argv[1])
train_data = np.array(data) # np.ndarray()
excel_list = train_data.tolist() # list
df=pd.DataFrame()

for i in excel_list:

print("正在查询", i[0])
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'}
url = '地址'
payload = {"regNo": i[0]}
r = requests.get(url, params=payload, headers=headers, stream=True)
print(r.url)
data=r.text
pos = data.find('pdf.action')
uid = data[pos + 14:data.find('&', pos)]

访问PDF

req = requests.get('地址'.format(uid))  # 通过访问互联网得到文件内容
print('正在下载PDF',req.url)
bytes_io = io.BytesIO(req.content)  # 转换为字节流
with open('1.pdf', 'wb') as file:
   file.write(bytes_io.getvalue())  # 保存到本地

等待3S

time.sleep(1)
print('等待1S下载')

PDF转PNG

try:
  fname = '1.pdf'
  doc = fitz.open(fname)
except:
  print('错误!查不到工商信息')
else:
  for page in doc:
    pix = page.get_pixmap()
    pix.save('1.png')

截二维码

img = Image.open("1.png")
img = img.convert("RGB")
qr = img.crop((910, 260, 1000, 340))
qr = qr.resize((x * 5 for x in qr.size))
qr = qr.save("1-1.png", quality=100)

识别二维码

frame= cv2.imread("1-1.png")
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
barcodes = pyzbar.decode(gray)
thre = 35
while (len(barcodes) == 0 and thre < 200):
     ret, thresh = cv2.threshold(gray, thre, 255, cv2.THRESH_BINARY)
     barcodes = pyzbar.decode(thresh)
     thre = thre + 10
for barcode in barcodes:
   (x, y, w, h) = barcode.rect
   res=barcode.data

print(barcodes)

识别二维码地址

res = barcodes[0].data.decode()#转化字符
pos = res.find("id")#取地址ID
uid = res[pos + 3:res.find('&', pos)]#取地址&前面
res = requests.get('地址'.format(uid),headers=headers)
print('正在识别二维码地址',res.url)
try:
    data = res.json()
except:
    print('错误!查不到手机号码')

查询工商信息

else:
    df1 = pd.DataFrame(data,index = [0])
    print('【完成查询】',i[0])
    df=pd.concat([df,df1])

df.to_excel('result.xlsx', index=False, )
print('数据保存成功!')

标签: none

添加新评论