import os import requests from bs4 import BeautifulSoup from urllib.parse import urljoin # 创建保存图片的目录 def create_directory(directory): if not os.path.exists(directory): os.makedirs(directory) # 下载图片 def download_image(url, folder_path): try: response = requests.get(url, stream=True) if response.status_code == 200: # 从URL中获取文件名 filename = os.path.join(folder_path, url.split("/")[-1]) with open(filename, 'wb') as file: for chunk in response.iter_content(1024): file.write(chunk) print(f"图片下载成功: {filename}") else: print(f"图片下载失败: {url}") except Exception as e: print(f"下载过程中出现错误: {e}") # 爬取网页中的图片 def scrape_images(url, folder_path="downloaded_images"): create_directory(folder_path) # 发送HTTP请求获取网页内容 response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # 查找所有标签 img_tags = soup.find_all('img') # 遍历所有图片标签,获取图片URL并下载 for img in img_tags: img_url = img.get('src') if not img_url: continue # 转换为绝对URL(有些网站使用相对路径) img_url = urljoin(url, img_url) # 下载图片 download_image(img_url, folder_path) # 示例使用 url = "https://lcdd.net" # 你想要爬取的网页URL scrape_images(url)