Python小试牛刀——新浪微博自动转发

前言

为了更加熟练python,所以通过新浪微博转发的例子加深印象

环境

  • python3.6

登录实现

登录请求链

通过火狐追踪新浪微博完整登录请求,可以获得2个关键URL

可以通过 http://login.sina.com.cn/js/sso/ssologin.js 解压查看加密方式

通过preLoginUrl得到几个关键参数

servertime,nonce,pubkey,rsakv

用户名通过base64位加密,也就是loginUrl中formData的su参数

# 得到su值base64加密的账号
def getSu(self):
    b64_username = su = base64.b64encode(self.userName.encode(encoding="utf-8"))
    return su

密码加密方式比较复杂,通过ssologin.js中查看,也就是loginUrl中formData的sp参数

# 从加密文件中得到加密密码sp
def getSp(self,servertime,nonce,pubkey,rsakv):
    pubkey = int(pubkey, 16)
    # 65537是js加密文件文件中的固定值,原是十六进制数字10001
    key = rsa.PublicKey(pubkey, 65537)
    # 以下拼接明文从js加密文件中得到签名
    sign = str(servertime) + '\t' + str(nonce) + '\n' + str(self.password)
    sign = sign.encode('utf-8')
    //rsa加密
    sp = rsa.encrypt(sign, key)
    # 把二进制数据的每个字节转换成相应的2位十六进制表示形式。
    sp = binascii.b2a_hex(sp)
    return sp

获取登录信息

当然也可以用urllib3

        import requests # pip install requests

        # 得到登录信息
requests.packages.urllib3.disable_warnings() 
def getInfo(self,su):
    print(u'正在校验用户信息,请稍后...')
    while True:
         try:
             # 新浪微博做了登录加密处理
             preLoginUrl = 'https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=%s&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.19)&_=%s' % (su,int(round(time.time() * 1000)))                                     
             r = requests.get(preLoginUrl, verify=False);
             if(r.status_code == 200):
                # 通过正则表达式取出sinaSSOController.preloginCallBack({jsonBody})内容部分与密码加密有关
                jsonBody = re.findall(r'\((\{.*?\})\)', r.text)[0]
                data = json.loads(jsonBody)
                servertime = data['servertime']
                nonce = data['nonce']
                pubkey = data['pubkey']
                rsakv = data['rsakv']
             else:
                print(u'preLoginUrl调用出错,错误响应码:%s' % r.status_code)
                break
         except requests.exceptions.ConnectionError:
             print("校验失败,1s后重试...")
             time.sleep(1)
             continue
         return servertime,nonce,pubkey,rsakv 

登录接口

       requests.packages.urllib3.disable_warnings() 
def getCookies(self,su, sp, servertime, nonce, rsakv):     
     print(u'用户正在登录,请稍后...')
     while True:
         try:
             #实际登录地址
             loginUrl = 'https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.19)'
             #表单数据
             formData = {
               'entry':'weibo',
               'gateway':'1',
               'from':'',
               'qrcode_flag':'false',
               'useticket':'1',
               'pagerefer':'https://login.sina.com.cn/crossdomain2.php?action=logout&r=https%3A%2F%2Fweibo.com%2Flogout.php%3Fbackurl%3D%252F',
               'vsnf':'1',
               'su':su,
               'service':'miniblog',
               'servertime':servertime,
               'nonce':nonce,
               'pwencode':'rsa2',
               'rsakv':rsakv,
               'sp':sp,
               'sr':'1366*768',
               'encoding':'UTF-8',
               'prelt':'4959',
               'url':'https://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
               'returntype':'META'
             }
             #请求头
             headers = {
               'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
               'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
               'Accept-Encoding':'gzip, deflate, br',
               'Accept-Language':'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
               'Content-Type':'application/x-www-form-urlencoded',
               'Host':'login.sina.com.cn',
               'Referer':'https://weibo.com/',
               'Upgrade-Insecure-Requests':'1'
             }
             #会话对象requests.Session能够跨请求地保持某些参数
             session = requests.Session()
             session.headers = headers
             response = session.post(loginUrl, data=formData, verify=False)
             result = response.content.decode('gbk')
             #正则表达式找到location.replace中的网页location.replace("https://login.sina.com.cn/crossdomain2.php?action=login&entry=weibo&r=https%3A%2F%2Fpassport.weibo.com%2Fwbsso%2Flogin%3Fssosavestate%3D1548839580%26url%3Dhttps%253A%252F%252Fweibo.com%252Fajaxlogin.php%253Fframelogin%253D1%2526callback%253Dparent.sinaSSOController.feedBackUrlCallBack%2526sudaref%253Dweibo.com%26display%3D0%26ticket%3DST-NTk3Njc3MDY4NQ%3D%3D-1517303580-gz-D57C7A6D615CC630C5CB53402C39C42C-1%26retcode%3D0&sr=1366%2A768");
             # 并且得到location.replace中响应消息<html><head><script language='javascript'>parent.sinaSSOController.feedBackUrlCallBack({"result":true,"userinfo":{"uniqueid":"5976770685","userid":null,"displayname":null,"userdomain":"?wvr=5&lf=reg"},"redirect":"https:\/\/weibo.com\/nguide\/interest"});</script></head><body></body></html>
             info = re.findall(r"location\.replace\(\"(.*?)\"", result)
             # 响应内容
             # {"retcode":0,"arrURL":["https:\/\/passport.weibo.com\/wbsso\/crossdomain?action=login","https:\/\/passport.97973.com\/sso\/crossdomain?action=login","https:\/\/passport.krcom.cn\/sso\/crossdomain?service=krvideo&savestate=1&action=login","https:\/\/passport.weibo.cn\/sso\/crossdomain?action=login"]}
             res = requests.get(info[0], verify=False);
             jsonBody = re.findall(r'\((\{.*?\})\)', res.text)[0]
             # 获取cookie cookies.get_dict()得到cookie字典
             cookies = response.cookies.get_dict()
             data = json.loads(jsonBody)
             if(data['retcode'] == 0):  
                 print(u'用户%s登录成功' % self.userName)
                 return cookies
             elif(data['retcode'] == 101):
                 print(data['reason'])
                 return None
             else:
                print(u'登录失败,原因:%s' % data['reason'])
                return None
         except requests.exceptions.ConnectionError:
             print(u"登录尝试失败,1s后重试连接...")
             time.sleep(1)
             continue

登录成功后,保存cookie,进行转发

 # content 发送内容 mid 微博ID pdetail location 博主定位
requests.packages.urllib3.disable_warnings() 
def forward(self,contents,mid,domain,location,is_comment_base):
    requestCount = 0; #总请求次数
    successCount = 0; #成功次数
    # 通过登录接口获取用户cookie
    su = weibo.getSu()
    servertime,nonce,pubkey,rsakv=self.getInfo(su)
    sp = weibo.getSp(servertime,nonce,pubkey,rsakv)
    cookies = self.getCookies(su, sp,servertime,nonce,rsakv)
    print(u'正在请求转发,请稍后...')
    while True:      
        try:
            content = CommonUtil.randomContent(contents)
            forwardUrl = 'https://weibo.com/aj/v6/mblog/forward?ajwvr=6&domain=%s&__rnd=%s' % (domain,int(round(time.time() * 1000))) 

            formData = {
                'pic_src':'',
                'pic_id':'',
                'appkey':'',
                'mid':mid,
                'styletype':'1',
                'mark':'',
                'reason':content,
                'location':'page_%s_home' % domain,
                'pdetail':pdetail,
                'module':'',
                'page_module_id':'',
                'refer_sort':'',
                'rank':'0',
                # 给原博主评论
                #'is_comment_base':'1',
                'rankid':'',
                'isReEdit':'false',
                '_t':'0',
            }   
            if(is_comment_base == 'y'):
                formData['is_comment_base'] = '1'

            session = requests.Session()
            #请求头
            headers = {
               'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
               'Accept':'*/*',
               'Accept-Encoding':'gzip, deflate, br',
               'Accept-Language':'zh-CN,zh;q=0.9',
               'Content-Type':'application/x-www-form-urlencoded',
               'Origin':'https://weibo.com',
               'Host':'weibo.com',
               'Referer':'https://weibo.com/u/5783494388?is_all=1',
               'Upgrade-Insecure-Requests':'1',
               'X-Requested-With':'XMLHttpRequest',
            }
            session.headers = headers  
            time.sleep(2)#2s发送一次请求
            response = session.post(forwardUrl, data=formData,cookies=cookies, verify=False)
            result = response.content.decode('gbk')
            data = json.loads(result)
            requestCount = requestCount + 1 #记录请求数
            if(data['code'] == '100000'):
                successCount = successCount + 1 #记录成功数
                print(u'转发内容:%s 成功次数:%s' % (content,successCount))
            elif(data['code'] == '100027'):
                print(u'转发失败,进入小黑屋,停止转发,请更换账号,失败原因:%s  %s' % (data['code'],data['msg']))
                print(u'%s目前转发成功次数:%s' %(self.userName,successCount))  
                break
            elif(data['code'] == '100001' and '微博发的太多啦,休息一会再发啦' in data['msg']):
                print(u'微博发送太频繁,休息1分钟重试,失败原因%s(%s)' %(data['code'],data['msg']))
                time.sleep(60)
                continue
            else:
                print(u'转发失败,失败原因:%s  %s' %(data['code'],data['msg']))
                print(u'%s目前转发成功次数:%s' %(self.userName,successCount)) 
            #if(requestCount >= setCount):
                #print(u'用户%s转发成功次数:%s' %(self.userName,successCount))
                #break
        except requests.exceptions.ConnectionError:
             print("转发尝试失败,1s后重试连接...")
             time.sleep(1)
             continue 
    print(u'总共转发%s次' %successCount) 

基础工具类

class CommonUtil:
         @staticmethod
         def randomContent(contents):
              return str(random.sample(contents, 1)[0])
         @staticmethod
         def decodeUTF8(str):
             return str.decode("UTF-8")
         @staticmethod
         def decodeGBK(str):
             return str.decode("GBK")