這篇文章帶給大家的內容是關於Java如何取得http和https網址對應html資料(附程式碼),有一定的參考價值,有需要的朋友可以參考一下,希望對你有幫助。
由於先前在公司一直用的C#做的軟體開發,近些天有同學需要用Java來做一個從指定網址取得資訊的Java程式。正好不是很難,順便複習了一下Java的知識。
要求如下,在https://www.marinetraffic.com/en/ais/details/ships/shipid:650235/mmsi:414726000/vessel:YU MING網址中得到如下圖框中標記的數據。
程式如下:GetWebPosition類別是主要程式類別
package yinhang.wang; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSocketFactory; import javax.net.ssl.TrustManager; public class GetWebPosition { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub String info = GetDataByTwo(); System.out.println(info); } // 从指定的url中获取数据 //https://www.marinetraffic.com/en/ais/details/ships/shipid:650235/mmsi:414726000/vessel:YU%20MING private static String HttpRequest(String requestUrl) { StringBuffer buffer = null; BufferedReader bufferedReader = null; InputStreamReader inputStreamReader = null; InputStream inputStream = null; HttpsURLConnection httpUrlConn = null; // 建立并向网页发送请求 try { TrustManager[] tm = { new MyX509TrustManager() }; SSLContext sslContext = SSLContext.getInstance("SSL", "SunJSSE"); sslContext.init(null, tm, new java.security.SecureRandom()); // 从上述SSLContext对象中得到SSLSocketFactory对象 SSLSocketFactory ssf = sslContext.getSocketFactory(); URL url = new URL(requestUrl); // 描述状态 httpUrlConn = (HttpsURLConnection) url.openConnection(); httpUrlConn.setSSLSocketFactory(ssf); httpUrlConn .setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36)"); //防止报403错误。 httpUrlConn.setDoOutput(true); httpUrlConn.setDoInput(true); httpUrlConn.setUseCaches(false); // 请求的类型 httpUrlConn.setRequestMethod("GET"); // 获取输入流 inputStream = httpUrlConn.getInputStream(); inputStreamReader = new InputStreamReader(inputStream, "utf-8"); bufferedReader = new BufferedReader(inputStreamReader); // 从输入流读取结果 buffer = new StringBuffer(); String str = null; while ((str = bufferedReader.readLine()) != null) { buffer.append(str); } } catch (Exception e) { e.printStackTrace(); } finally { // 释放资源 if (bufferedReader != null) { try { bufferedReader.close(); } catch (IOException e) { e.printStackTrace(); } } if (inputStreamReader != null) { try { inputStreamReader.close(); } catch (IOException e) { e.printStackTrace(); } } if (inputStream != null) { try { inputStream.close(); } catch (IOException e) { e.printStackTrace(); } } if (httpUrlConn != null) { httpUrlConn.disconnect(); } } return buffer.toString(); }
private static String HtmlFiter(String html) { StringBuffer buffer = new StringBuffer(); String str1 = ""; String str2 = ""; //取出所用的范围, //Pattern p = Pattern.compile("(.*)(<p class=\"panel panel-primary no-border vertical-offset-20\">)(.*)(</p>)(.*)"); Pattern p = Pattern.compile("(.*)(</script>)(.*)(<p class=\"wind_icon wind_low\")(.*)"); Matcher m = p.matcher(html); if (m.matches()) { str1 = m.group(3); //取得时间:Vessel's Local Time: p = Pattern.compile("(.*)(time datetime=\")(.*)(\">)(.*)(</time>)(.*)(</span></strong>)(.*)"); m = p.matcher(str1); if (m.matches()) { str2 = m.group(5); String str3 = m.group(7); buffer.append("\nVessel's Local Time: "); buffer.append(str2); buffer.append(str3); } // <a href="/en/ais/home/centerx:120.3903/centery:32.02979/zoom:10/mmsi:414726000/shipid:650235" // class="details_data_link">32.02979° / 120.3903°</a> //取得当前经纬度:Latitude / Longitude: p = Pattern.compile("(.*)(class=\"details_data_link\">)(.*)(</a></strong></span>)(.*)"); m = p.matcher(str1); if (m.matches()) { str2 = m.group(3); buffer.append("\nLatitude / Longitude: "); buffer.append(str2); } //取得当前速度航线Speed/Course: p = Pattern.compile("(.*)(<span><strong>)(.*)(</strong></span>)(.*)"); m = p.matcher(str1); if (m.matches()) { str2 = m.group(3); buffer.append("\nSpeed/Course: "); buffer.append(str2); } } return buffer.toString(); } //封裝上述两个方法 public static String GetDataByTwo(){ //调用第一个方法,获得html字符串 String html = HttpRequest("https://www.marinetraffic.com/en/ais/details/ships/shipid:650235/mmsi:414726000/vessel:YU%20MING"); //调用第二个方法,过滤掉无用的信息 String result = HtmlFiter(html); return result; } }
MyX509TrustManager這個類別的作用是提供安全性憑證去存取https類型的網站
package yinhang.wang; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import javax.net.ssl.X509TrustManager; public class MyX509TrustManager implements X509TrustManager { public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { } public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { } public X509Certificate[] getAcceptedIssuers() { return null; } }
希望初步學習正規表示式和爬資料的小夥伴們能夠用到。
以上是Java如何取得http和https網址對應html資料(附程式碼)的詳細內容。更多資訊請關注PHP中文網其他相關文章!