文章详情页

Java 根据网络URL获取该网页上面所有的img标签并下载图片

浏览：89日期：2022-08-21 14:55:50

说明：根据网络URL获取该网页上面所有的img标签并下载符合要求的所有图片

所需jar包：jsoup.jar

import java.io.BufferedInputStream;import java.io.BufferedOutputStream;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;import java.util.ArrayList;import java.util.List;import java.util.UUID;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;/** * 图片批量下载工具类 * @author Marydon * @create time 2016-9-3下午2:01:03 * @update time 2017年9月30日11:07:02 * @E-mail:dellshouji@163.com */public class ImgDownloadUtil { /** * 根据URL获取网页DOM对象 * @param url * 网址 * @return DOM对象 */ public static Document getHtmlDocument(String url) { Document document = null; URL urlObj = null; try { // 1.建立网络连接 urlObj = new URL(url); // 2.根据url获取Document对象 document = Jsoup.parse(urlObj, 5000);// 单位：毫秒超时时间 } catch (MalformedURLException e) { System.out.println('世界上最遥远的距离就是没有网，检查设置！'); e.printStackTrace(); } catch (IOException e) { System.out.println('您的网络连接打开失败，请稍后重试！'); e.printStackTrace(); } return document; } /** * 根据URL获取网页源码 * @param url * 网址 * @return 网页源码 */ public static String getHtmlText(String url) { String htmlText = ''; Document document = null; URL urlObj = null; try { // 1.建立网络连接 urlObj = new URL(url); // 2.根据url获取Document对象 document = Jsoup.parse(urlObj, 5000);// 单位：毫秒超时时间 // 3.根据dom对象获取网页源码 htmlText = document.html(); } catch (MalformedURLException e) { System.out.println('世界上最遥远的距离就是没有网，检查设置！'); e.printStackTrace(); } catch (IOException e) { System.out.println('您的网络连接打开失败，请稍后重试！'); e.printStackTrace(); } return htmlText; } /** * 操作Dom对象获取图片地址 * @param document * Dom对象 * @return 图片地址集合 */ public static List<String> getImgAddressByDom(Document document) { // 用于存储图片地址 List<String> imgAddress = new ArrayList<String>(); if (null != document) { // <img src='https://www.haobala.com/bcjs/5670.html' alt='' width='' height=''/> // 获取页面上所有的图片元素 Elements elements = document.getElementsByTag('img'); String imgSrc = ''; // 迭代获取图片地址 for (Element el : elements) {imgSrc = el.attr('src');// imgSrc的内容不为空，并且以http://开头if ((!imgSrc.isEmpty()) && imgSrc.startsWith('http://')) { // 将有效图片地址添加到List中 imgAddress.add(imgSrc);} } } return imgAddress; } /** * 根据网络URL下载文件 * @param url * 文件所在地址 * @param fileName * 指定下载后该文件的名字 * @param savePath * 文件保存根路径 */ public static void downloadFileByUrl(String url, String fileName, String savePath) { URL urlObj = null; URLConnection conn = null; InputStream inputStream = null; BufferedInputStream bis = null; OutputStream outputStream = null; BufferedOutputStream bos = null; try { // 1.建立网络连接 urlObj = new URL(url); // 2.打开网络连接 conn = urlObj.openConnection(); // 设置超时间为3秒 conn.setConnectTimeout(3 * 1000); // 防止屏蔽程序抓取而返回403错误 conn.setRequestProperty('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)'); // 3.得到输入流 inputStream = conn.getInputStream(); bis = new BufferedInputStream(inputStream); // 文件保存位置 File saveDir = new File(savePath); if (!saveDir.exists()) {saveDir.mkdirs(); } // 文件的绝对路径 String filePath = savePath + File.separator + fileName; File file = new File(filePath); // 4. outputStream = new FileOutputStream(file); bos = new BufferedOutputStream(outputStream); byte[] b = new byte[1024]; int len = 0; while ((len = bis.read(b)) != -1) {bos.write(b, 0, len); } System.out.println('info:' + url + ' download success,fileRename=' + fileName); } catch (MalformedURLException e) { System.out.println('世界上最遥远的距离就是没有网，检查设置'); System.out.println('info:' + url + ' download failure'); e.printStackTrace(); } catch (IOException e) { System.out.println('您的网络连接打开失败，请稍后重试！'); System.out.println('info:' + url + ' download failure'); e.printStackTrace(); } finally {// 关闭流 try {if (bis != null) {// 关闭字节缓冲输入流 bis.close();}if (inputStream != null) {// 关闭字节输入流 inputStream.close();}if (bos != null) {// 关闭字节缓冲输出流 bos.close();}if (outputStream != null) {// 关闭字节输出流 outputStream.close();} } catch (IOException e) {e.printStackTrace(); } } }}

测试

public static void main(String[] args) { // 1.确定网址 String url = 'http://www.cnblogs.com/Marydon20170307/p/7402871.html'; // 2.获取该网页的Dom对象 Document document = getHtmlDocument(url); // 3.获取该网页所有符合要求的图片地址 List<String> imgAddresses = getImgAddressByDom(document); String imgName = ''; String imgType = ''; // 4.设置图片保存路径 String savePath = 'C:/Users/Marydon/Desktop'; // 5.批量下载图片 for (String imgSrc : imgAddresses) { // 5.1图片命名：图片名用32位字符组成的唯一标识 imgName = UUID.randomUUID().toString().replace('-', ''); // 5.2图片格式（类型） imgType = imgSrc.substring(imgSrc.lastIndexOf('.')); imgName += imgType; // 5.3下载该图片 downloadFileByUrl(imgSrc, imgName, savePath); }}

以上就是Java 根据网络URL获取该网页上面所有的img标签并下载图片的详细内容，更多关于java 下载网络图片的资料请关注好吧啦网其它相关文章！

Java

上一条：Java调用WebService接口作测试下一条：java 如何读取properties文件

相关文章：

1. Xml简介_动力节点Java学院整理2. 三个不常见的 HTML5 实用新特性简介3. html清除浮动的6种方法示例4. 在JSP中使用formatNumber控制要显示的小数位数方法5. CSS3使用过度动画和缓动效果案例讲解6. XML解析错误：未组织好的解决办法7. 使用css实现全兼容tooltip提示框8. XHTML 1.0：标记新的开端9. CSS3实例分享之多重背景的实现(Multiple backgrounds)10. ASP基础入门第三篇(ASP脚本基础)

排行榜

					
					Django正则URL匹配实现流程解析
idea设置代码格式化的方法步骤
springboot+vue实现登录功能
IntelliJ IDEA导出项目的方法
python 制作网站小说下载器
解决vue axios跨域 Request Method: OPTIONS问题(预检请求)
Python实现图片指定位置加图片水印（附Pyinstaller打包exe)
PHP中使用pthread拓展
XML解析错误：未组织好 的解决办法
python小白切忌乱用表达式
解析使用useDark(),发现transition 动画失效
				

热门标签