实例为从我文章中读取标题。
通过 class 属性锁定标题元素,把匹配的内容打印出来。
下面是源码:
代码语言:javascript复制package com.test.test;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
public class WebHtmlTest {
public static void main(String[] args) throws IOException {
/*
作用:从url中读取web页面的内容
*/
String html_url = "https://lanzao.blog.csdn.net/article/details/119329989";
// 连接的超时时间
System.setProperty("sun.net.client.defaultConnectTimeout", "20000");
// 读取数据的超时时间
System.setProperty("sun.net.client.defaultReadTimeout", "20000");
try {
URL url = new URL(html_url);
URLConnection url_connection = url.openConnection();
InputStream input_stream = url_connection.getInputStream();
InputStreamReader input_stream_reader = new InputStreamReader(input_stream,"utf-8");
BufferedReader html_reader = new BufferedReader(input_stream_reader);
String html_reader_line = null;
// 读取html内容
while ((html_reader_line = html_reader.readLine()) != null) {
if(html_reader_line.contains("class="title-article"")) {
System.out.println(html_reader_line);
}
}
// 关闭创建的对象
html_reader.close();
input_stream_reader.close();
input_stream.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}