竞赛圈 > 求大神:R模拟新浪微博登录出现302问题
我用R语言模拟新浪微博的http请求出现问题,下面贴一部分代码:
setwd("C:/Users/szy/Desktop")
getwd()
library(RCurl)
library(rjson)
library(stringr)
library(XML)
library(PKI)
t1 <- floor(as.numeric(Sys.time())*1000)
u <- "******@****"
u <- gsub('@','%40',u)
su <- base64(u)[1]
preReqHeader <- c(
"Host"="login.sina.com.cn",
"User-Agent"="iaskspider/2.0(+http://iask.com/help/help_index.html)",
"Accept"="*/*",
"Accept-Language"="zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
# "Accept-Encoding"="gzip, deflate, br",
"Referer"="http://weibo.com/",
"Connection"="keep-alive",
)
plt <- t1
prelogin_url <- paste("https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.18)&_=",plt,sep="")
text1 <- getURL(prelogin_url,httpheader=preReqHeader)
tmp <- fromJSON(gsub("sinaSSOController.preloginCallBack\\(\\{(.*)\\}\\)","\\{\\1\\}",text1))
t2 <- floor(as.numeric(Sys.time())*1000)
prelt <- t2-plt-tmp$exectime
pw <- "******"
key_p <- tmp$pubkey
key_p_p <- ""
for(i in 1:(str_length(key_p) / 2)){
key_p_p[i] <- unclass(as.hexmode(tolower(str_sub(key_p,(2*i-1),(2*i)))))
}
pubkey_p <- PKI.mkRSApubkey(as.raw(key_p_p),exponent=65537L,format="key")
keyword <- paste(tmp$servertime,"\t",tmp$nonce,"\n",pw,sep="")
passwd <- PKI.encrypt(charToRaw(keyword),pubkey_p)
passwd <- paste(as.character(passwd),sep="",collapse="")
h <- basicHeaderGatherer()
loginData <- c(
"entry"="weibo",
"gateway"="1",
"from"="",
"savestate"="7",
"useticket"="1",
"pagerefer"="http://login.sina.com.cn/sso/logout.php?entry=miniblog&r=http%3A%2F%2Fweibo.com%2Flogout.php%3Fbackurl%3D%252F",
"vsnf"="1",
"su"=su,
"service"="miniblog",
"servertime"=tmp$servertime,
"nonce"=tmp$nonce,
"pwencode"="rsa2",
"rsakv"=tmp$rsakv,
"sp"=passwd,
"sr"="1366*768",
"encoding"="UTF-8",
"prelt"=prelt,
"url"="http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack",
"returntype"="META"
)
loginReqHeader <- c(
"Host"="login.sina.com.cn",
"User-Agent"="iaskspider/2.0(+http://iask.com/help/help_index.html)",
"Accept"="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language"="zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
# "Accept-Encoding"="gzip, deflate",
"Referer"="http://weibo.com/",
# 改进:Content-Length的计算
"Connection"="keep-alive",
"Content-Type"="application/x-www-form-urlencoded",
"Content-Length"=sum(nchar(loginData)),
"Upgrade-Insecure-Requests"="1",
"Cookie"="******"
)
login_url <- "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)"
text2 <- postForm(login_url,.params=loginData,style="post",httpheader=loginReqHeader)
newurl <- strsplit(text2,"location.replace\\(\\'")[[1]][2]
newurl <- strsplit(newurl,"\\'\\)")[[1]][1]
到这里还是正常的,得出的newurl如下:
http://passport.weibo.com/wbsso/login?ssosavestate=1509927443&url=http%3A%2F%2Fweibo.com%2Fajaxlogin.php%3Fframelogin%3D1%26callback%3Dparent.sinaSSOController.feedBackUrlCallBack&ticket=ST-MTc5MTkxODY3Mw==-1478391443-gz-D3A503269FE8B46FC848599ABE801FCF&retcode=0
然后就出现问题了,请求newurl的结果是空,并且h$value()出现302问题:
text4 <- getURL(newurl,headerfunction=h$update)
h$value()
说明是跳转页面,那接下来应该怎么做呢,求大神,我在这里卡了好久。。。