I have to lament the flexibility and power of using DOM to directly parse the HTML DOM tree, because the basic HTML elements are just a few common ones, plus ID attributes or CLASS attributes. .
When parsing HTML files, you can use regular expressions to get rid of them. After all, there are a lot of similar patterns in HTML files, and the code seems to have obvious functions. Of course, regular expressions are very powerful and have a wider range of applications. .
The code is as follows:
<?php //关闭载入包含js时的警告提示 error_reporting(E_ERROR | E_PARSE); class DomTree { //DOM句柄 private $doc=null; //保存基本解释 private $basic_meaning=array(); //保存英汉双解 private $en_or_ch=array(); //保存英英释义 private $en_to_en=array(); //保存例句 private $example=array(); //保存常用句型 private $sentences=array(); //保存词汇表 private $glossary=array(); //保存经典名人名言 private $auth=array(); //保存常见错误用法 private $use_in_wrong = array(); //保存近义词 private $approximate_words = array(); //保存百科解释 private $baike_trans = array(); public function __construct($source) { $this->doc = new DomDocument(); //判断$source类型 if(is_file($source)) { file_exists($source)?$this->doc->loadHTMLFile($source):die("文件不存在"); } else if(is_string($source)) { empty($source)?die("传入的字符串不能为空"):$this->doc->loadHTML($source); } else { preg_match('#^(http|ftp)://#i', $source)?$this->doc->loadHTML(file_get_contents($source)):die("不支持的资源类型"); } //获取div元素列表 $div_list = $this->doc->getElementsByTagName("div"); $div_list_len = $div_list->length; for($i=0; $iitem($i)->hasAttribute("class")) { switch(trim($div_list->item($i)->getAttribute ("class"))) { case "basic clearfix": $this->getBasicMeans($div_list->item($i)); break; case "layout dual": $this->getEnOrCh($div_list->item($i)); break; case "layout en": $this->getEnToEn($div_list->item($i)); break; case "layout sort": $this->getExample($div_list->item($i)); break; case "layout patt": $this->normalSentence($div_list->item($i)); break; case "layout coll": $this->getGlossary($div_list->item($i)); break; case "layout auth": $this->getAuth($div_list->item($i)); break; case "layout comn": $this->useInWrong($div_list->item($i)); break; case "layout nfw": $this->getApproximateWords($div_list->item($i)); break; case "layout baike"; $this->getBaike($div_list->item($i)); break; } } } } //获取基本解释 private function getBasicMeans($basic_div) { $li_list = $basic_div->getElementsByTagName("li"); $li_list_len = $li_list->length; for($i=0; $iitem($i); if($item->hasAttribute("style")) { continue; } else { $strong_list = $item->getElementsByTagName("strong"); $strong_list_len = $strong_list->length; for($j=0; $jbasic_meaning[]=$strong_list->item($j)->nodeValue; } } } } //获取英汉双解释义 private function getEnOrCh($div_elem) { $li_list = $div_elem->getElementsByTagName("li"); $li_list_len = $li_list->length; for($i=0; $ien_or_ch[]=$li_list->item($i)->nodeValue; } } //获取英英释义 private function getEnToEn($div_elem) { $li_list = $div_elem->getElementsByTagName("li"); $li_list_len = $li_list->length; for($i=0; $ien_to_en[]= $this->strip_Empty($li_list->item($i)->nodeValue); } } //格式化操作 private function strip_Empty($string) { if(is_string($string)) { return preg_replace('#\s{2,}#', ' ', $string); } } //获取例句 private function getExample($div_elem) { if($div_elem->hasChildNodes()) { $ol_list = $div_elem->getElementsByTagName("ol"); $ol_list_len = $ol_list->length; for($i=0; $iitem($i)->getElementsByTagName("li"); $li_list_len = $li_list->length; for($j=0; $jexample[] = $this->strip_Empty($li_list->item($j)->nodeValue); } } } } //常见句型 private function normalSentence($div_elem) { $ol_list = $div_elem->getElementsByTagName("ol"); $ol_list_len = $ol_list->length; for($i=0; $iitem($i)->getElementsByTagName("li"); $li_list_len = $li_list->length; for($j=0; $jsentences[]=$this->strip_Empty($li_list->item($j)->nodeValue); } } } //常见词汇 private function getGlossary($div_elem) { $ul_list = $div_elem->getElementsByTagName("ul"); $ul_list_len = $ul_list->length; for($i=0; $iitem($i)->getElementsByTagName("li"); $li_list_len = $li_list->length; for($j=0; $jglossary[]=$this->strip_Empty($li_list->item($j)->nodeValue); } } } //获取名人名言 private function getAuth($div_elem) { $ul_list = $div_elem->getElementsByTagName("ul"); $ul_list_len = $ul_list->length; for($i=0; $iitem($i)->getElementsByTagName("li"); $li_list_len = $li_list->length; for($j=0; $jauth[]=$this->strip_Empty($li_list->item($j)->nodeValue); } } } //获取常见错误用法 private function useInWrong($div_elem) { $ol_list = $div_elem->getElementsByTagName("ol"); $ol_list_len = $ol_list->length; for($i=0; $iitem($i)->getElementsByTagName("li"); $li_list_len = $li_list->length; for($j=0; $juse_in_wrong[]=$this->strip_Empty($li_list->item($j)->nodeValue); } } } //获取近义词 private function getApproximateWords($div_elem) { $ul_list = $div_elem->getElementsByTagName("ul"); $ul_list_len = $ul_list->length; for($i=0; $iitem($i)->getElementsByTagName("li"); $li_list_len = $li_list->length; for($j=0; $jitem($j)->getElementsByTagName("a"); $a_list_len = $a_list->length; for($k=0; $kapproximate_words[]=$a_list->item($k)->nodeValue; } } } } //获取百科解释 private function getBaike($div_elem) { $ul_list = $div_elem->getElementsByTagName("ul"); $ul_list_len = $ul_list->length; for($i=0; $iitem($i)->getElementsByTagName("li"); $li_list_len = $li_list->length; for($j=0; $jbaike_trans[]=$li_list->item($j)->nodeValue; } } } //接口: 返回基本释义 public function getBasicMeaning() { if(!empty($this->basic_meaning)) { return $this->basic_meaning; } } //接口: 返回英汉双解 public function getEnOrChMeaning() { if(!empty($this->en_or_ch)) { return $this->en_or_ch; } } //接口: 返回英英释义 public function getEnToEnMeaning() { if(!empty($this->en_to_en)) { return $this->en_to_en; } } //接口: 返回例句 public function getExampleMeaning() { if(!empty($this->example)) { return $this->example; } } //接口: 返回常用句型 public function getNormalSentenceMeaning() { if(!empty($this->sentences)) { return $this->sentences; } } //接口: 返回词汇表 public function getGlossaryMeaning() { if(!empty($this->glossary)) { return $this->glossary; } } //接口: 返回名人名言 public function getAuthMeaning() { if(!empty($this->auth)) { return $this->auth; } } //接口: 返回常见错误用法 public function getUseInWrongMeaning() { if(!empty($this->use_in_wrong)) { return $this->use_in_wrong; } } //接口: 获取近义词 public function getApproximateWordsMeaning() { if(!empty($this->approximate_words)) { return $this->approximate_words; } } //接口: 获取百度百科的解释 public function getBaikeMeaning() { if(!empty($this->baike_trans)) { return $this->baike_trans; } } //返回所有的翻译 public function getAllMeaning() { $all_meaning = array(); $all_meaning['basic_meaning'] = $this->getBasicMeaning(); $all_meaning['en_or_ch'] = $this->getEnOrChMeaning(); $all_meaning['en_to_en'] = $this->getEnToEnMeaning(); $all_meaning['example']=$this->getExampleMeaning(); $all_meaning['normal_sentence'] = $this->getNormalSentenceMeaning(); $all_meaning['glossary_sentence'] = $this->getGlossaryMeaning(); $all_meaning['auth_sentence'] = $this->getAuthMeaning(); $all_meaning['wrong_use'] = $this->getUseInWrongMeaning(); $all_meaning['approximate_words'] = $this->getApproximateWordsMeaning(); $all_meaning['baike_meaning'] = $this->getBaikeMeaning(); return $all_meaning; } } $dom = new DomTree("./com.html"); $trans = $dom->getAllMeaning(); echo "<pre class="brush:php;toolbar:false">"; print_r($trans); ?>
The result is as follows:
Array ( [basic_meaning] => Array ( [0] => 单词;消息;话语;诺言 [1] => 用词语表达 ) [en_or_ch] => Array ( [0] => [C] 字,词 the smallest unit of spoken language which has meaning and can stand alone [1] => [C] (说的)话,话语,言语 anything said; remark or statement [2] => [S] 消息,信息; 谣言 piece of news; message; rumour [3] => [S] 口令,号令; 命令 spoken command or signal [4] => [S] 诺言,保证 a promise [5] => vt. 用词语表达; 选用 express (sth) in particular words; phrase sth ) [en_to_en] => Array ( [0] => a unit of language that native speakers can identify; "words are the blocks from which sentences are made" "he hardly said ten words all morning" [1] => a brief statement; "he didn't say a word about it" [2] => information about recent and important events; "they awaited news of the outcome" [3] => a verbal command for action; "when I give the word, charge!" [4] => an exchange of views on some topic; "we had a good discussion" "we had a word or two about it" [5] => a promise; "he gave his word" [6] => a word is a string of bits stored in computer memory; "large computers use words up to 64 bits long" [7] => the divine word of God; the second person in the Trinity (incarnate in Jesus) [8] => a secret word or phrase known only to a restricted group; "he forgot the password" [9] => the sacred writings of the Christian religions; "he went to carry the Word to the heathen" [10] => put into words or an expression; "He formulated his concerns to the board of trustees" ) [example] => Array ( [0] => Could we have a word before you go to the meeting? 你去开会之前,咱们能私下说句话吗? [1] => My friend sent word that he was well. 我朋友捎来口信说他很好。 ) [normal_sentence] => Array ( [0] => What does this word mean? 这个词是什么意思? [1] => I couldn't look up the spelling of the word, as I hadn't a dictionary at hand. 我没法查这个词的拼写,因为我手边没有词典。 [2] => Many English words are derived from Latin. 许多英文单词源于拉丁文。 [3] => All the words beside the central idea should be crossed out. 凡偏离中心思想的词语都应通通删掉。 [4] => The editor eliminated slang words from the essay. 编辑将俚语从这篇文章中剔除。 [5] => These words can't be staled by repetition. 这些词语不会因为经常使用而变成陈词滥调。 [6] => He gave me his visiting card, with a few words in pencil. 他把他的名片给我,上面有几个铅笔字。 [7] => I don't believe a word of his story. 他说的这件事我一句话都不相信。 [8] => At the press conference, the reporters copied down every word spoken by the prime minister. 在新闻发布会上,记者们逐字记下了首相的讲话。 [9] => Tell me what happened in your words. 用你自己的话把发生的事告诉我。 [10] => Deeds are better than words when people are in need of help. 当别人需要帮助时,行动胜于语言。 [11] => I would like a word with you. 我想和你谈谈。 [12] => After a word with the colonel he went away . 他和上校简单谈过之后就走了。 [13] => There's been no word from her for weeks. 已经有好几个星期没有她的音信了。 [14] => Word came that I was needed at home. 有信儿来说家里需要我。 [15] => Word has come that meeting will be held on Tuesday. 通知已到,星期二开会。 [16] => Word is that the election will be held in June. 有消息说选举将在六月份举行。 [17] => Word is that he's left the country. 据说他已经离开这个国家了。 [18] => Word got round that he had resigned. 谣传他已辞职。 [19] => Stay hidden until I give the word. 我不下令就藏着别动。 [20] => Their word is law. 他们的命令必须服从。 [21] => He gave the word and they let him in. 他说出了口令,他们让他进去了。 [22] => The word now is “freedom”. 现在的口号是“自由”。 [23] => I give you my word I'll go. 我向你保证,我会去的。 [24] => Stand by your word. 要守信用。 [25] => Hear The Word of God . 听宣讲《圣经》。 [26] => Be careful how you word your answer. 回答时要斟酌字句。 [27] => She worded the explanation well. 她的解释措辞得体。 [28] => The advice wasn't very tactfully worded. 这份通知措辞不太得体。 [29] => The suggestion might be worded more politely. 那项建议的措辞可以更婉转些。 [30] => This is a carefully worded contract. 这是一份措辞严谨的合同。 ) [glossary_sentence] => Array ( [0] => address a few words 讲几句话 [1] => await word from sb 等待某人的消息 [2] => break one's words 食言 [3] => breathe a word 走漏消息 [4] => bring word 带来消息 [5] => choose a word 选择词 [6] => coin a word 杜撰一个词 [7] => cook up words 造新词 [8] => cross out a word 划掉一个词 [9] => cut out many words 删掉许多词 [10] => digest a word 消化一个词 [11] => doubt sb's words 怀疑某人的话 [12] => drink in all the words 吸收所有的词语 [13] => eat one's words 收回前言,认错,道歉 [14] => exchange angry words 发生口角 [15] => find words 找出言语(来表达) [16] => gain the good word of 博得…的赞扬 [17] => get word 得到消息 [18] => get a word 插嘴 [19] => give one's word 保证,允许 [20] => give the word 发出命令 [21] => have words together 争吵 [22] => have words with sb 与某人吵嘴 [23] => have a word with sb 同某人谈一谈 [24] => hunt up a word 查一个词 [25] => keep one's word 信守诺言 [26] => leave word 留言 [27] => leave out a word 省略一个词,丢掉一个词 [28] => look up a word (在词典里)查一个词 [29] => memorize words 记单词 [30] => play on words 玩弄字眼 [31] => pronounce a word 读一个词 [32] => put in words for 为…说几句话 [33] => put the words into sb's mouth 教某人怎么讲 [34] => quote a word 引用一个词 [35] => receive word of 收到…消息 [36] => regret one's words 为说过的话而后悔 [37] => respect one's word 遵守自己许下的诺言 [38] => say a word 说句话,进一步,走漏消息 [39] => say a few words 说几句话 [40] => say a good word for sb 为某人说好话 [41] => send sb a word 给某人捎个信儿 [42] => spell a word 拼写一个词 [43] => stress the word 重读那个词 [44] => take back one's word 收回自己的话 [45] => take sb's word for it 相信了某人的话 [46] => understand a word 理解某个词的意思 [47] => use words 用词 [48] => waste one's words 白费口舌 [49] => weigh words 斟酌词句 [50] => write a word 写一个词 [51] => advance word 事先传出的消息 [52] => angry words 气话 [53] => beautiful words 优美的言辞 [54] => big words 大话 [55] => borrowed word 外来词 [56] => broken words 断断续续的话 [57] => burning words 热情洋溢的话 [58] => choice words 精选的词句 [59] => colorful words 丰富的言辞 [60] => cross words 气话 [61] => empty words 空洞的话,无意义的话 [62] => everyday word 日常用语 [63] => farewell words 送别词 [64] => fighting words 容易引起争论的话,挑战性的话 [65] => foreign word 外来词 [66] => hard words 愤怒的话,激烈的话 [67] => heated word 激烈的言词,争吵时使用的话 [68] => high words 愤怒的话,激烈的话 [69] => hollow words 虚假的言语 [70] => honeyed words 甜言蜜语 [71] => hot words 激烈的言词,争吵时使用的话 [72] => household word 家喻户晓的词 [73] => irresponsible words 不负责任的话 [74] => key words 关键的字眼 [75] => last words 临终遗言 [76] => living words 现代语 [77] => meaningful words 意味深长的言语 [78] => meaningless words 无意义的话 [79] => misspelled word 拼错的词 [80] => native word 本国词,本地词 [81] => pleasant words 动听的语言 [82] => regional word 方言 [83] => scientific word 科学用语 [84] => semi-technical words 半科技词 [85] => sharp words 愤怒的话,激烈的话 [86] => simple word 简单的词 [87] => sincere words 真诚的话 [88] => small word 小词 [89] => spoken words 口头语 [90] => suggestive words 含蓄的话 [91] => sweet words 甜言蜜语 [92] => tearful parting words 伤感的离别之言 [93] => the latest word 最新消息,最后消息 [94] => uncleanly words 下流话 [95] => unfamiliar word 生词 [96] => unusual word 冷僻词 [97] => warm words 忿怒的话,激烈的话 [98] => written words 书面语 [99] => wrong words 错词 [100] => dictionary word 词典里出现的词 [101] => English words 英语单词 [102] => law word 法律用语 [103] => newspaper word 新闻用语 [104] => slang word 俚语 [105] => at a word 立即,立刻 [106] => in a word 简言之,总之 [107] => in one's own words 用自己的话说 [108] => in other words 换言之 [109] => upon my word 的确,真的 [110] => without a word 一声没吭 [111] => word in heavy type 黑体字 [112] => words in season 时宜的话 [113] => words of comfort 安慰的话 [114] => words of command 命令 [115] => words of complaint 怨言 [116] => the W- of God 圣经 [117] => words of praise 表扬的话 [118] => word of six letters 六个字母的词 [119] => words of thanks 感谢的话 [120] => word the explanation 解释 [121] => word accurately 准确地用言语表达 [122] => word crudely 简单地用词语〔语言〕表达 [123] => word felicitously 恰当地用言语表达 [124] => word intelligibly 清楚地用语言表达 [125] => word positively 明确地用词语表达 [126] => word vaguely 含糊地表达 [127] => word well 措辞得体 ) [auth_sentence] => Array ( [0] => Rome shall perishswrite that word In the blood that she has spilt. 出自:W. Cowper [1] => We have striven..to draw some word from her; but she..answers nothing. 出自:G. P. R. James [2] => To use his own words, he was in a cleft stick. 出自:H. Conway [3] => Actions speak louder than words. 出自:Proverb [4] => He words me, girls, he words me, that I should not Be noble to myself. 出自:Anthony Cleopatra,Shakespeare ) [wrong_use] => Array ( [0] => 我要跟他说句话。 误 I should like to have word with him. 正 I should like to have a word with him. [1] => 他们听到消息说足球比赛将在今晚电视实况转播。 误 They had a word that the football match would be televised live this evening. 正 They had word that the football match would be televised live this evening. 析 have word是“听到消息〔新闻〕”的意思,“说句话”是have a word。 [2] => 对逐词背课文,我感到厌倦。 误 I was tired of reciting the texts word after word. 正 I was tired of reciting the texts word for word. 析 “一字不变地,逐字(背诵或翻译)”是word for word,不是word after word。 [3] => 我说了什么错话吗? 误 Have I said any wrong words? 正 Have I said anything wrong? 析 误句语法上没有错,但不符合英语习惯。 [4] => 他不遵守诺言。 误 He broke his words. 正 He broke his word. 析 break one's word意为“不遵守诺言”, word在此短语中不用复数形式。 [5] => 我刚得知他到达的消息。 误 I have just received the word of his arrival. 正 I have just received word of his arrival. [6] => 有消息传来说我们的篮球队赢了这场比赛。 误 The word came that our basketball team had won the match. 正 Word came that our basketball team had won the match. 析 作“消息”“信息”解时, word前不加冠词。 [7] => 他大约是30年前开始当教师的,换句话说,他当教师已经有30年了。 误 He began to work as a teacher some thirty years ago, in another word, he has been a teacher for thirty years. 正 He began to work as a teacher some thirty years ago, in other words, he has been a teacher for thirty years. 析 in other words是固定短语,意为“换句话说”。 [8] => 他带信给我说怀特先生不久将动身去美国。 误 He carried me words that Mr.White would soon leave for America. 正 He carried me word that Mr. White would soon leave for America. 析 word作“消息”“信”解时,是不可数名词,其后不可加s。 [9] => 今晨我们争吵了。 误 We had a word this morning. 正 We had words this morning. [10] => 他们曾为鸡毛蒜皮的小事同邻居吵过嘴。 误 They had word with their neighbour over some trifles. 正 They had words with their neighbours over some trifles. 析 表示“同某人发生口角”时,用have words with sb, words用复数形式。 [11] => 他说的大话使我们都感到惊讶。 误 His big word surprised us all. 正 His big words surprised us all. [12] => 我们绝不收回前言。 误 We should on no account eat our word. 正 We should on no account eat our words. 析 习语big words, eat one's words中, words词尾的s不可省。 ) [approximate_words] => Array ( [0] => account [1] => advice [2] => chat [3] => communication [4] => declaration [5] => edict [6] => expression [7] => message [8] => notice [9] => order [10] => password [11] => promise [12] => remark [13] => term [14] => couch [15] => explain [16] => express [17] => phrase [18] => put [19] => say [20] => write ) [baike_meaning] => Array ( [0] => word:Microsoft Word,属于办公软件,人们日常生活都有可能接触到他,对他并不陌生。 简介 wordMicrosoft Word是微软公司的一个文字处理器应用程序。它最初是由Richard Bro… ) )
The above introduces how to parse the html dom node tree in PHP, including the relevant content. I hope it will be helpful to friends who are interested in PHP tutorials.

许多用户在选择智能手表的时候都会选择的华为的品牌,其中华为GT3pro和GT4都是非常热门的选择,不少用户都很好奇华为GT3pro和GT4有什么区别,下面就就给大家介绍一下二者。华为GT3pro和GT4有什么区别一、外观GT4:46mm和41mm,材质是玻璃表镜+不锈钢机身+高分纤维后壳。GT3pro:46.6mm和42.9mm,材质是蓝宝石玻璃表镜+钛金属机身/陶瓷机身+陶瓷后壳二、健康GT4:采用最新的华为Truseen5.5+算法,结果会更加的精准。GT3pro:多了ECG心电图和血管及安

为什么截图工具在Windows11上不起作用了解问题的根本原因有助于找到正确的解决方案。以下是截图工具可能无法正常工作的主要原因:对焦助手已打开:这可以防止截图工具打开。应用程序损坏:如果截图工具在启动时崩溃,则可能已损坏。过时的图形驱动程序:不兼容的驱动程序可能会干扰截图工具。来自其他应用程序的干扰:其他正在运行的应用程序可能与截图工具冲突。证书已过期:升级过程中的错误可能会导致此issu简单的解决方案这些适合大多数用户,不需要任何特殊的技术知识。1.更新窗口和Microsoft应用商店应用程

在之前的文章《实用Word技巧分享:聊聊你没用过的“行号”功能》中,我们了解了Word中你肯定没用过的"行号”功能。今天继续实用Word技巧分享,看看Excel表格怎么借用Word进行分栏打印,快来收藏使用吧!

在之前的文章《实用Word技巧分享:设置页码的终极方法!》中,我们学习了Word页码的设置方法。而今天我们来一起聊聊Word文本间距设置的几个技巧,快来收藏使用吧!

在之前的文章《实用Word技巧分享:隐藏图片,提升文档浏览和编辑效率!》中,我们学习了隐藏图片的技巧,可提升文档浏览和编辑效率。下面本篇文章再给大家分享一个实用Word技巧,看看怎么让页面自动滚动,快来收藏使用吧!

在之前的文章《实用Word技巧分享:怎么跨文档快速复制样式》中,我们了解了在文档间快速复制样式的方法。今天我们聊聊Word快捷键,聊聊【F4】键快速统一图片大小,快来看看吧!

第1部分:初始故障排除步骤检查苹果的系统状态:在深入研究复杂的解决方案之前,让我们从基础知识开始。问题可能不在于您的设备;苹果的服务器可能会关闭。访问Apple的系统状态页面,查看AppStore是否正常工作。如果有问题,您所能做的就是等待Apple修复它。检查您的互联网连接:确保您拥有稳定的互联网连接,因为“无法连接到AppStore”问题有时可归因于连接不良。尝试在Wi-Fi和移动数据之间切换或重置网络设置(“常规”>“重置”>“重置网络设置”>设置)。更新您的iOS版本:

在之前的文章《实用Word技巧分享:如何一键删除所有数字》中,我们学习了Word中一键删除所有数字的方法。而今天我们来聊聊Word表格中如何自动添加编号,简单却很实用!


Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

AI Hentai Generator
Generate AI Hentai for free.

Hot Article

Hot Tools

SublimeText3 Linux new version
SublimeText3 Linux latest version

Notepad++7.3.1
Easy-to-use and free code editor

Atom editor mac version download
The most popular open source editor

WebStorm Mac version
Useful JavaScript development tools

ZendStudio 13.5.1 Mac
Powerful PHP integrated development environment
