package main
import (
"fmt"
"io/ioutil"
"net/http"
"net/http/cookiejar"
"net/url"
"io"
"log"
"regexp"
"strings"
)
var cookies_lagou []*http.Cookie
const (
login_url_lagou string = "https://passport.lagou.com/login/login.html"
post_login_info_url_lagou string = "https://passport.lagou.com/login/login.json"
username_lagou string = "xxxxxxxxxx"
password_lagou string = "4525674692ac06e619cdb3f1b4b65b08"
)
func getToken(contents io.Reader) (string,string){
data, _ := ioutil.ReadAll(contents)
regCode := regexp.MustCompile(`X_Anti_Forge_Code\s+\=(.+?);`)
if regCode == nil {
log.Fatal("Code...")
}
//
code := regCode.FindAllStringSubmatch(string(data), -1)[0][1]
regToken := regexp.MustCompile(`X_Anti_Forge_Token\s+\=(.+?);`)
if regToken == nil {
fmt.Println("MustCompile err")
}
//
token := regToken.FindAllStringSubmatch(string(data), -1)[0][1]
return token,code
}
func login_lagou() {
//cookie
jar, _ := cookiejar.New(nil)
client := &http.Client{
Jar: jar,
}
req, _ := http.NewRequest("GET", login_url_lagou, nil)
res, _ := client.Do(req)
for k, v := range res.Cookies() {
fmt.Printf("%v=%v\n",k,v)
}
token,code := getToken(res.Body)
//post
postValues := url.Values{}
postValues.Add("isValidate", "true")
postValues.Add("username", username_lagou)
postValues.Add("password", password_lagou)
postValues.Add("request_form_verifyCode", "")
postValues.Add("submit", "")
body := ioutil.NopCloser(strings.NewReader(postValues.Encode())) //form
requ, _ := http.NewRequest("POST", post_login_info_url_lagou, body)
requ.Header.Set("X-Requested-With","XMLHttpRequest")
requ.Header.Set("Content-Type","application/x-www-form-urlencoded; charset=UTF-8")
requ.Header.Set("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36")
requ.Header.Set("Host","passport.lagou.com")
requ.Header.Set("Origin","https://passport.lagou.com")
requ.Header.Add("X-Anit-Forge-Token",token)
requ.Header.Add("X-Anit-Forge-Code",code)
requ.Header.Set("Accept","application/json, text/javascript, */*; q=0.01")
requ.Header.Set("Connection","keep-alive")
requ.Header.Set("Accept-Language","en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7")
requ.Header.Set("Accept-Encoding","gzip, deflate, br")
//requ.Header.Set("Content-Length","111")
//requ.Header.Set("Cookie","JSESSIONID=ABAAABAAAHAAAFD632D7028F2AC7466F2DAA44C1BE15A01; user_trace_token=20180418184827-7aec3c11-930a-494a-9a73-c83419b1450a; X_HTTP_TOKEN=7ab0dd4e12bf94d2d4c0c3370e1c6341; _ga=GA1.2.837634606.1524048588; _gid=GA1.2.1349141044.1524048588; _ga=GA1.3.837634606.1524048588; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1523518887,1524030389,1524033687,1524045972; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1524048588; LGSID=20180418184827-06c3d493-42f6-11e8-8a1c-525400f775ce; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=http%3A%2F%2Fpassport.lagou.com%2Flogin%2Flogin.html; LGUID=20180418184827-06c3d687-42f6-11e8-8a1c-525400f775ce; LGRID=20180418185212-8ca212e6-42f6-11e8-b8a8-5254005c3644; TG-TRACK-CODE=undefined")
for _, v := range res.Cookies() {
requ.AddCookie(v)
}
res,_ = client.Do(requ)
//cookies_lagou = res.Cookies()
data, _ := ioutil.ReadAll(res.Body)
res.Body.Close()
fmt.Println(string(data))
}
func main() {
login_lagou()
}
the running result of the above code is:
0=JSESSIONID=ABAAABAAAHAAAFD5DA9395672E82BF6CEDCAE3CA350ED64; Path=/; HttpOnly
1=user_trace_token=20180418221336-b03f8889-f175-46cf-b986-af55a8ef3e33; Path=/; Domain=lagou.com; Max-Age=31536000
{"content":{"rows":[]},"message":"","state":299}
Process finished with exit code 0
I refer to this article python-- pull-hook crawler simulated login in python, there is a requests package, and the previous cookies,session information is automatically retained in requests.session, but not when implemented in golang. Cookies, is also set to add the cookies traversal of login.html, and then request login. In the actual login, cooike seems to have something more. I don"t know if it"s because of this. Browser login Header information:
Accept: application/json, text/javascript, */*; q=0.01
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7
Connection: keep-alive
Content-Length: 111
Content-Type: application/x-www-form-urlencoded; charset=UTF-8
Cookie: user_trace_token=20180418184827-7aec3c11-930a-494a-9a73-c83419b1450a; _ga=GA1.2.837634606.1524048588; _gid=GA1.2.1349141044.1524048588; _ga=GA1.3.837634606.1524048588; LGUID=20180418184827-06c3d687-42f6-11e8-8a1c-525400f775ce; index_location_city=%E6%88%90%E9%83%BD; gate_login_token=e27e32a29e46e476b94f06e8e0a5b6d149dbb2a5efd97c51; LGSID=20180418212135-6b42e55b-430b-11e8-b8a9-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1524030389,1524033687,1524045972,1524057776; JSESSIONID=ABAAABAAAHAAAFD5346FCED99BA9CA2991F52D85C9D0982; X_HTTP_TOKEN=7ab0dd4e12bf94d2d4c0c3370e1c6341; TG-TRACK-CODE=undefined; _gat=1; LGRID=20180418213214-e7ac4b8b-430c-11e8-8a7e-525400f775ce; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1524058415
Host: passport.lagou.com
Origin: https://passport.lagou.com
Referer: https://passport.lagou.com/login/login.html
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36
X-Anit-Forge-Code: 21248807
X-Anit-Forge-Token: cc96fefa-0bad-4a06-8674-48fc1b42e125
X-Requested-With: XMLHttpRequest
excuse me, seniors, is there something wrong with my code? I have Baidu, google for two days, all kinds of twists and turns, and even pull down the python code to run, but you can simulate landing, golang is not, there is really no way, so come to the forum to help your predecessors, ask seniors for advice, thank you very much.