You need to use the jsoup-1.7.3.jar package. If you need to see the document, I will download it. Please take a step to the official website
Here I will post the test of the Java project I used. Code
package com.javen.Jsoup; import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class JsoupTest { static String url="http://www.cnblogs.com/zyw-205520/archive/2012/12/20/2826402.html"; /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub BolgBody(); //test(); //Blog(); /* * Document doc = Jsoup.connect("http://www.oschina.net/") * .data("query", "Java") // 请求参数 .userAgent("I ’ m jsoup") // 设置 * User-Agent .cookie("auth", "token") // 设置 cookie .timeout(3000) // * 设置连接超时时间 .post(); */// 使用 POST 方法访问 URL /* * // 从文件中加载 HTML 文档 File input = new File("D:/test.html"); Document doc * = Jsoup.parse(input,"UTF-8","http://www.oschina.net/"); */ } /** * 获取指定HTML 文档指定的body * @throws IOException */ private static void BolgBody() throws IOException { // 直接从字符串中输入 HTML 文档 String html = "<html><head><title> 开源中国社区 </title></head>" + "<body><p> 这里是 jsoup 项目的相关文章 </p></body></html>"; Document doc = Jsoup.parse(html); System.out.println(doc.body()); // 从 URL 直接加载 HTML 文档 Document doc2 = Jsoup.connect(url).get(); String title = doc2.body().toString(); System.out.println(title); } /** * 获取博客上的文章标题和链接 */ public static void article() { Document doc; try { doc = Jsoup.connect("http://www.cnblogs.com/zyw-205520/").get(); Elements ListDiv = doc.getElementsByAttributeValue("class","postTitle"); for (Element element :ListDiv) { Elements links = element.getElementsByTag("a"); for (Element link : links) { String linkHref = link.attr("href"); String linkText = link.text().trim(); System.out.println(linkHref); System.out.println(linkText); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 获取指定博客文章的内容 */ public static void Blog() { Document doc; try { doc = Jsoup.connect("http://www.cnblogs.com/zyw-205520/archive/2012/12/20/2826402.html").get(); Elements ListDiv = doc.getElementsByAttributeValue("class","postBody"); for (Element element :ListDiv) { System.out.println(element.html()); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
The following is an introduction to the use of Jsoup in android to asynchronously parse web page data. Please note: It is easy to encounter a garbled stable
configuration here. File : Add permissions to AndroidManifest.xml
<uses-permission android:name="android.permission.INTERNET"></uses-permission>
Layout file
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android" xmlns:tools="http://schemas.android.com/tools" android:layout_width="match_parent" android:layout_height="match_parent" android:orientation="vertical" > <WebView android:id="@+id/webView" android:layout_width="fill_parent" android:layout_height="200dp" /> <ScrollView android:layout_width="wrap_content" android:layout_height="wrap_content" > <TextView android:id="@+id/textView" android:layout_width="wrap_content" android:layout_height="wrap_content" android:text="@string/hello_world" /> </ScrollView> </LinearLayout>
The code that mainly loads data asynchronously
package com.javen.aaa; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import android.app.Activity; import android.app.Dialog; import android.app.ProgressDialog; import android.os.AsyncTask; import android.os.Bundle; import android.util.Log; import android.webkit.WebView; import android.widget.TextView; public class MainActivity extends Activity { private WebView webView; private TextView textView; private static final int DIALOG_KEY = 0; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.main); webView = (WebView) findViewById(R.id.webView); textView=(TextView) findViewById(R.id.textView); try { ProgressAsyncTask asyncTask=new ProgressAsyncTask(webView,textView); asyncTask.execute(10000); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } public String test() { StringBuffer buffer=new StringBuffer(); Document doc; try { doc = Jsoup.connect("http://www.cnblogs.com/zyw-205520/").get(); Elements ListDiv = doc.getElementsByAttributeValue("class","postTitle"); for (Element element :ListDiv) { Elements links = element.getElementsByTag("a"); for (Element link : links) { String linkHref = link.attr("href"); String linkText = link.text().trim(); buffer.append("linkHref=="+linkHref); buffer.append("linkText=="+linkText); System.out.println(linkHref); System.out.println(linkText); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return buffer.toString(); } // 弹出"查看"对话框 @Override protected Dialog onCreateDialog(int id) { switch (id) { case DIALOG_KEY: { ProgressDialog dialog = new ProgressDialog(this); dialog.setMessage("获取数据中 请稍候..."); dialog.setIndeterminate(true); dialog.setCancelable(true); return dialog; } } return null; } public static String readHtml(String myurl) { StringBuffer sb = new StringBuffer(""); URL url; try { url = new URL(myurl); BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream(), "gbk")); String s = ""; while ((s = br.readLine()) != null) { sb.append(s + "\r\n"); } } catch (Exception e) { e.printStackTrace(); } return sb.toString(); } class ProgressAsyncTask extends AsyncTask<Integer, Integer, String> { private WebView webView; private TextView textView; public ProgressAsyncTask(WebView webView,TextView textView) { super(); this.webView=webView; this.textView=textView; } /** * 这里的Integer参数对应AsyncTask中的第一个参数 这里的String返回值对应AsyncTask的第三个参数 * 该方法并不运行在UI线程当中,主要用于异步操作,所有在该方法中不能对UI当中的空间进行设置和修改 * 但是可以调用publish Progress方法触发onProgressUpdate对UI进行操作 */ @Override protected String doInBackground(Integer... params) { String str =null; Document doc = null; try { // String url ="http://www.cnblogs.com/zyw-205520/p/3355681.html"; // // doc= Jsoup.parse(new URL(url).openStream(),"utf-8", url); // //doc = Jsoup.parse(readHtml(url)); // //doc=Jsoup.connect(url).get(); // str=doc.body().toString(); doc = Jsoup.connect("http://www.cnblogs.com/zyw-205520/archive/2012/12/20/2826402.html").get(); Elements ListDiv = doc.getElementsByAttributeValue("class","postBody"); for (Element element :ListDiv) { str=element.html(); System.out.println(element.html()); } Log.d("doInBackground", str.toString()); System.out.println(str); //你可以试试GBK或UTF-8 } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return str.toString() ; //return test(); } /** * 这里的String参数对应AsyncTask中的第三个参数(也就是接收doInBackground的返回值) * 在doInBackground方法执行结束之后在运行,并且运行在UI线程当中 可以对UI空间进行设置 */ @Override protected void onPostExecute(String result) { webView.loadData(result, "text/html;charset=utf-8", null); textView.setText(result); removeDialog(DIALOG_KEY); } // 该方法运行在UI线程当中,并且运行在UI线程当中 可以对UI空间进行设置 @Override protected void onPreExecute() { showDialog(DIALOG_KEY); } /** * 这里的Intege参数对应AsyncTask中的第二个参数 * 在doInBackground方法当中,,每次调用publishProgress方法都会触发onProgressUpdate执行 * onProgressUpdate是在UI线程中执行,所有可以对UI空间进行操作 */ @Override protected void onProgressUpdate(Integer... values) { } } }
The above is Use Jsoup to capture the data content of the page. For more related content, please pay attention to the PHP Chinese website (www.php.cn)!

HTMLtagsdefinethestructureofawebpage,whileattributesaddfunctionalityanddetails.1)Tagslike,,andoutlinethecontent'splacement.2)Attributessuchassrc,class,andstyleenhancetagsbyspecifyingimagesources,styling,andmore,improvingfunctionalityandappearance.

The future of HTML will develop in a more semantic, functional and modular direction. 1) Semanticization will make the tag describe the content more clearly, improving SEO and barrier-free access. 2) Functionalization will introduce new elements and attributes to meet user needs. 3) Modularity will support component development and improve code reusability.

HTMLattributesarecrucialinwebdevelopmentforcontrollingbehavior,appearance,andfunctionality.Theyenhanceinteractivity,accessibility,andSEO.Forexample,thesrcattributeintagsimpactsSEO,whileonclickintagsaddsinteractivity.Touseattributeseffectively:1)Usese

The alt attribute is an important part of the tag in HTML and is used to provide alternative text for images. 1. When the image cannot be loaded, the text in the alt attribute will be displayed to improve the user experience. 2. Screen readers use the alt attribute to help visually impaired users understand the content of the picture. 3. Search engines index text in the alt attribute to improve the SEO ranking of web pages.

The roles of HTML, CSS and JavaScript in web development are: 1. HTML is used to build web page structure; 2. CSS is used to beautify the appearance of web pages; 3. JavaScript is used to achieve dynamic interaction. Through tags, styles and scripts, these three together build the core functions of modern web pages.

Setting the lang attributes of a tag is a key step in optimizing web accessibility and SEO. 1) Set the lang attribute in the tag, such as. 2) In multilingual content, set lang attributes for different language parts, such as. 3) Use language codes that comply with ISO639-1 standards, such as "en", "fr", "zh", etc. Correctly setting the lang attribute can improve the accessibility of web pages and search engine rankings.

HTMLattributesareessentialforenhancingwebelements'functionalityandappearance.Theyaddinformationtodefinebehavior,appearance,andinteraction,makingwebsitesinteractive,responsive,andvisuallyappealing.Attributeslikesrc,href,class,type,anddisabledtransform

TocreatealistinHTML,useforunorderedlistsandfororderedlists:1)Forunorderedlists,wrapitemsinanduseforeachitem,renderingasabulletedlist.2)Fororderedlists,useandfornumberedlists,customizablewiththetypeattributefordifferentnumberingstyles.


Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

Video Face Swap
Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Article

Hot Tools

Dreamweaver Mac version
Visual web development tools

SublimeText3 Mac version
God-level code editing software (SublimeText3)

WebStorm Mac version
Useful JavaScript development tools

Atom editor mac version download
The most popular open source editor

DVWA
Damn Vulnerable Web App (DVWA) is a PHP/MySQL web application that is very vulnerable. Its main goals are to be an aid for security professionals to test their skills and tools in a legal environment, to help web developers better understand the process of securing web applications, and to help teachers/students teach/learn in a classroom environment Web application security. The goal of DVWA is to practice some of the most common web vulnerabilities through a simple and straightforward interface, with varying degrees of difficulty. Please note that this software
