Python小试牛刀——获取代理IP

因为爬某些网站有IP限制,所以弄了一个获取代理的工具类

# -*- coding: utf-8 -*-

from bs4 import BeautifulSoup # pip install bs4 安装lxml解析器 import pip print(pip.pep425tags.get_supported()) 检测版本https://www.lfd.uci.edu/~gohlke/pythonlibs/#lxml
import requests
import random

class Proxy:

    def getIps(self,url, headers):
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'lxml')
        ips = soup.find_all('tr')
        ipList = []
        for i in range(1, len(ips)):
            ipInfo = ips[i]
            tds = ipInfo.find_all('td')
            ipList.append(tds[1].text + ':' + tds[2].text)
        return ipList

    def getRandomIp(self,ipList):
        proxyList = []
        for ip in ipList:
            proxyList.append('http://' + ip)
        proxyIp = random.choice(proxyList)
        proxyUrl = {'http': proxyIp,'https':proxyIp}
        return proxyUrl

if __name__ == '__main__':

    proxy = Proxy()

    url = 'http://www.xicidaili.com/nn/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
    }
    ipList = proxy.getIps(url, headers=headers)
    proxyUrl = proxy.getRandomIp(ipList)
    print(proxyUrl)