python-demo/downloadImage/image_spilder.py

57 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
# 创建保存图片的目录
def create_directory(directory):
if not os.path.exists(directory):
os.makedirs(directory)
# 下载图片
def download_image(url, folder_path):
try:
response = requests.get(url, stream=True)
if response.status_code == 200:
# 从URL中获取文件名
filename = os.path.join(folder_path, url.split("/")[-1])
with open(filename, 'wb') as file:
for chunk in response.iter_content(1024):
file.write(chunk)
print(f"图片下载成功: {filename}")
else:
print(f"图片下载失败: {url}")
except Exception as e:
print(f"下载过程中出现错误: {e}")
# 爬取网页中的图片
def scrape_images(url, folder_path="downloaded_images"):
create_directory(folder_path)
# 发送HTTP请求获取网页内容
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# 查找所有<img>标签
img_tags = soup.find_all('img')
# 遍历所有图片标签获取图片URL并下载
for img in img_tags:
img_url = img.get('src')
if not img_url:
continue
# 转换为绝对URL有些网站使用相对路径
img_url = urljoin(url, img_url)
# 下载图片
download_image(img_url, folder_path)
# 示例使用
url = "https://lcdd.net" # 你想要爬取的网页URL
scrape_images(url)