from bs4 import BeautifulSoup
import requests,re,os
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'referer' : 'https://www.vmgirls.com/'
}
def loadDatas(datas):
for data in datas:
url = "https://www.vmgirls.com/" + data
print(url)
print('-----------------------------------------------')
Down_Image(url)
print('-----------------------------------------------')
def Down_Image(url):
response = requests.get(url, headers=headers).text
soup = BeautifulSoup(response, 'html.parser')
image_url = soup.find_all('img')
for data in image_url:
image_type = data.get('src').split('.')[-1]
if image_type == 'jpg' or image_type == 'jpeg' or image_type == 'png':
url_data = data.get('src')
dir_name = soup.find(class_='post-title h1').string
if not os.path.exists(dir_name):
os.mkdir(dir_name)
str_url_data = str(url_data)
if not re.match(r'^http', str_url_data):
str_url_data = "https:" + str_url_data
image = requests.get(str_url_data, headers=headers).content
file_name = url_data.split('/')[-1]
with open(dir_name + '/' + file_name, 'wb') as f:
print('正在写入----->' + dir_name + '/' + file_name)
f.write(image)
if __name__ == '__main__':
print(' ---------------------------------------------------------------------')
print('| |')
print('| Author:culprit --- 52pojie |')
print('| Modified by panpanpan(1277936431) --- 52pojie |')
print('| |')
print(' ---------------------------------------------------------------------')
with open(r'datas.txt') as f:
content = f.read()
datas = content.split('\n')
input('点击开始!') loadDatas(datas)