57 lines
1.5 KiB
Python
57 lines
1.5 KiB
Python
import os
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
from urllib.parse import urljoin
|
||
|
||
|
||
# 创建保存图片的目录
|
||
def create_directory(directory):
|
||
if not os.path.exists(directory):
|
||
os.makedirs(directory)
|
||
|
||
|
||
# 下载图片
|
||
def download_image(url, folder_path):
|
||
try:
|
||
response = requests.get(url, stream=True)
|
||
if response.status_code == 200:
|
||
# 从URL中获取文件名
|
||
filename = os.path.join(folder_path, url.split("/")[-1])
|
||
with open(filename, 'wb') as file:
|
||
for chunk in response.iter_content(1024):
|
||
file.write(chunk)
|
||
print(f"图片下载成功: {filename}")
|
||
else:
|
||
print(f"图片下载失败: {url}")
|
||
except Exception as e:
|
||
print(f"下载过程中出现错误: {e}")
|
||
|
||
|
||
# 爬取网页中的图片
|
||
def scrape_images(url, folder_path="downloaded_images"):
|
||
create_directory(folder_path)
|
||
|
||
# 发送HTTP请求获取网页内容
|
||
response = requests.get(url)
|
||
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
||
# 查找所有<img>标签
|
||
img_tags = soup.find_all('img')
|
||
|
||
# 遍历所有图片标签,获取图片URL并下载
|
||
for img in img_tags:
|
||
img_url = img.get('src')
|
||
if not img_url:
|
||
continue
|
||
|
||
# 转换为绝对URL(有些网站使用相对路径)
|
||
img_url = urljoin(url, img_url)
|
||
|
||
# 下载图片
|
||
download_image(img_url, folder_path)
|
||
|
||
|
||
# 示例使用
|
||
url = "https://lcdd.net" # 你想要爬取的网页URL
|
||
scrape_images(url)
|