from lxml import etree
import requests,random
class ip_proxy:
arr = [] # 存放有效的ip
headers = {
'User-Agen': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
}
# 检查ip是否有效
@classmethod
def check_valid(cls, ip_port):
response = requests.get(url='https://www.baidu.com/', headers=cls.headers, proxies={'http': ip_port})
if response.status_code == 200:
return True
return False
# 获取ip列表
@classmethod
def get_ip_list(cls):
# 分页抓取
for page in range(1, 6):
response = requests.get(url=f'http://www.ip3366.net/?stype=1&page={page}', headers=cls.headers)
html_tree = etree.HTML(response.text)
tr_tags = html_tree.xpath('//div[@id="list"]/table/tbody/tr')
# 提取ip
for tr in tr_tags:
td_tags = tr.xpath('./td')
ip = td_tags[0].text.strip()
port = td_tags[1].text.strip()
ip_port = 'http://' + ip + ':' + port
if cls.check_valid(ip_port):
cls.arr.append(ip_port)
return cls.arr
# 获取ip代理
@classmethod
def get_proxy(cls):
ip_list = cls.get_ip_list()
count = len(ip_list)
proxy = {}
while True:
index = random.randint(0, count-1)
ip_address = ip_list[index]
if cls.check_valid(ip_address):
proxy = {'http' : ip_address}
break
return proxy
if __name__ == '__main__':
resp = ip_proxy.get_proxy()
print(resp)
相关内容