-sharp!/usr/bin env python3
-sharpcoding=utf-8
import scrapy,os
import re,time,json
from lxml import etree
-sharp import yt_common
import hashlib
class WechatSpider(scrapy.Spider):
name = "wechat_official_account"
allowed_domains = ["wexin.qq.com"]
start_urls = ["https://mp.weixin.qq.com/"]
def __init__(self):
self.official_account = os.getcwd() +"/"+"official_account.text"
def start_requests(self):
url="https://mp.weixin.qq.com/cgi-bin/bizlogin?action=startlogin"
headers={"Referer": "https://mp.weixin.qq.com/cgi-bin/loginpage?t=wxm2-login&lang=zh_CN",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36"
}
form_data={"username":"leehome1989@sina.com","pwd": hashlib.new(name="md5", string="sw892130").hexdigest(),"f":"json","ajax":"1","lang": "zh_CN"}
yield scrapy.FormRequest(url=url,formdata=form_data,headers=headers,callback=self.wechat_login)
def wechat_login(self,response):
print(response.text)
redirect_url=re.findall("redirect_url\":\"(.*?)\"",response.text)[0]
url = "https://mp.weixin.qq.com" + redirect_url
headers = {"Referer": url,
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
"Upgrade - Insecure - Requests": 1}
qr_url="https://mp.weixin.qq.com/"
yield scrapy.Request(url=qr_url,callback=self.get_cookie, dont_filter=True,headers=headers)
def get_cookie(self,response):
print(response.text)
print("asdasdasdasdada")
cookies = response.request.headers.getlist("Cookie")
print(cookies)
token = re.findall(r"token=(\d+)", response.text)[0]
print(token)
-sharp headers={"Host": "mp.weixin.qq.com",
-sharp "Referer": "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=validate&lang=zh_CN&account=leehome1989%40sina.com",
-sharp "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.3"}
-sharp yield scrapy.Request(url=url, callback=self.get_fakeid, dont_filter=True, headers=headers,cookies=cookies)
def get_fakeid(self,response):
token=response.meta["meta"]
print (response.text)
fakeid=re.findall("fakeid\":"(.*)==",",response.text)[0]
print (fakeid)
url="https://mp.weixin.qq.com/cgi-bin/appmsg?token={}&lang=zh_CN&f=json&ajax=1&action=list_ex&begin=0&count=5&query=&fakeid={}%3D%3D&type=9".format(token,fakeid)
yield scrapy.Request(url=url,callback=self.parse,dont_filter=True,headers=self.headers)
def pasrse(self,response):
print (response.text)
the interface of Wechat"s official account is logged in, because the token in it is based on scanning the code, but I can"t get that page when I ask for it.