Home > Article > Backend Development > Create Baidu Dictionary word search collector with PHP_PHP tutorial
This article mainly introduces the relevant information about PHP making Baidu Dictionary word search collector. Friends who need it can refer to it
Baidu dict collection sample
Written to collect all result data after translation from Baidu dict dictionary. Of course, it comes with a 13.5w word library and simple collection cases. Here I will release the main class dict.class.php I wrote. The project address is http:/ /github.com/widuu/baidu_dict, if you need it, just fork it~ Well, very few people use this thing, so the useful brothers took it away~
|
/** * dict.class.php collects Baidu dictionary translation content * * @copyright (C) 2014 widuu * @license http://www.widuu.com * @lastmodify 2014-2-15 */
header("content-type:text/html;charset=utf8"); class Dict{
private $word;
//显示的条数 private static $num = 10;
public function __construct(){}
/** * Public method of returning Baidu collected data * @param string English word * retun array( * symbol" => phonetic symbol * "pro" => Pronunciation * "example"=> Example * "explain"=> concise explanation * "synonym"=> Synonyms and antonyms * "phrase" => Phrase array * ) * */ public function content($word){ $this -> word = $word; $symbol = $this -> Pronounced(); $pro = $this->getSay(); $example = $this -> getExample(); $explain = $this -> getExplain(); $synonym = $this -> getSynonym(); $phrase = $this -> getPhrase(); $result = array( "symbol" => $symbol, //音标 "pro" => $pro, //发音 "example"=> $example, //例句 "explain"=> $explain, //简明释义 "synonym"=> $synonym, //同反义词 "phrase" => $phrase //短语数组 ); return $result; }
/** * Remotely obtain Baidu translation content * get function curl * retun string * */
private function getContent(){ $useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0"; $ch = curl_init(); $url = "http://dict.baidu.com/s?wd=".$this->word; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_USERAGENT,$useragent); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_HTTPGET, 1); curl_setopt($ch, CURLOPT_AUTOREFERER,1); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_TIMEOUT, 30); $result = curl_exec($ch); if (curl_errno($curl)) { echo 'Errno'.curl_error($curl); } curl_close($ch); return $result; }
/** * Get Baidu translation pronunciation * retun array(UK, US) * */
private function Pronounced(){ $data = $this -> getContent(); preg_match_all("/"EN-US">(.*)/Ui",$data,$pronounced); return array( 'en' => $pronounced[1][0], 'us' => $pronounced[1][1] ); }
/** * Get Baidu translation pronunciation * return array(UK, US) * */
private function getSay(){ $data = $this -> getContent(); preg_match_all("/url="(.*)"/Ui",$data,$pronounced); return array( 'en' => $pronounced[1][0], 'us' => $pronounced[1][1] ); }
/** * Get Baidu translation examples * return array() multidimensional array Example * */
private function getExample(){ $str = ""; $data = $this -> getContent(); preg_match_all("/var example_data = (.*)];/Us",$data,$example); $data1 = "[[[".ltrim($example[1][0],"["); $data2 = explode("[[[",$data1); $num = count(array_filter($data2)); foreach($data2 as $key => $value){ $data3 = explode("[[","[[".$value); foreach ($data3 as $k => $v) { preg_match_all("/["(.*)",/Us","[".$v, $match); if(!empty($match[1])){ $str .= implode($match[1]," ")."@"; } } } $data4 = trim($str,"@"); $data5 = explode("@", $data4); $result = array_chunk($data5, 2); return $result; }
/** * Get a concise explanation * return array (x => "Part of Speech", b => "Attachment") * **/
private function getExplain(){ $data = $this -> getContent(); preg_match_all("/id="en-simple-means">(.*) /Us",$data,$explain);
$r_data = $explain[1][0]; preg_match_all("/ (?P preg_match_all("/(?P
$result = array(); foreach ($a_data["adj"] as $key => $value) { $result[$value] = $a_data["name"][$key]; }
$word_b = array(); foreach ($b_data["tag"] as $key => $value) { $word_b[$value] = strip_tags($b_data["word"][$key]); }
$result_data = array("x" => $result,"b" => $word_b);
return $result_data; }
/** * Get synonyms * return array(0 => "Synonym", 1 => "Antonym") usually a multi-dimensional array * */
private function getSynonym(){ $data = $this -> getContent(); preg_match_all("/id="en-syn-ant">(.*) /Us",$data,$synonym);
$content = $synonym[1][0]; $data1 = explode("", $content); $result = array(); $data2 = array(); foreach ($data1 as $key => $value) { preg_match_all("/(?P
$data2[$key]["adj"] = $r_data["adj"]; $data2[$key]["content"] = $r_data["content"]; }
foreach ($data2 as $key => $value) { foreach ($value["content"] as $k => $v) { if(!empty($v)){ preg_match_all("/ (?P foreach ($v_data['title'] as $m => $d) { $data = strip_tags(preg_replace("<>"," ", $v_data["value"][$m])); $result[$key][$value["adj"][$k]][$d] = $data; } } } } return $result; }
/** * Get phrase phrase * return array (key => value) one-dimensional or multi-dimensional array * */
private function getPhrase(){ $num = self::$num; $data = $this -> getContent(); preg_match_all("/id="en-phrase">(.*) /Us",$data,$phrase);
$data = explode("",$phrase[1][0]); $data1 = array_slice($data,0,$num); $result = array(); foreach ($data1 as $key => $value) { $data2 = explode(" ", $value);$n = count($data2); if($n<=3){ $result[str_replace(" ","",strip_tags($data2[0]))] = strip_tags($data2[1]); }else{ $data3 = array_slice($data2,0,$n-1); $data4 = array_slice($data2,0,2); $res = array_diff($data3,$data4); $data5 = array_chunk($res,2); $key_value = trim(str_replace(" ","",strip_tags($data4[0]))); $result[$key_value] = strip_tags($data4[1]); foreach ($data5 as $key => $value) { foreach ($value as $k => $v) { $value[$k] = strip_tags($v); } $array = array($result[$key_value],$value); if (array_key_exists($key_value, $result)){ $result[$key_value] = $array; } }
} } return $result; }
/** * Convert array to string * * @param array $data array * @param bool $isformdata If it is 0, new_stripslashes processing is not used, optional parameter, default is 1 * @return string Returns a string, if data is empty, then returns empty */ private function array2string($data, $isformdata = 1) { if($data == '') return ''; if($isformdata) $data = $this->new_stripslashes($data); return addslashes(var_export($data, TRUE)); }
/** * Returns the string or array processed by stripslashes * @param $string The string or array to be processed * @return mixed */ private function new_stripslashes($string) { if(!is_array($string)) return stripslashes($string); foreach($string as $key => $val) $string[$key] = $this->new_stripslashes($val); return $string; }
}
// $word = new dict("express"); // $word ->content(); |
The above is the entire content of this article. It is a very practical function. I hope you will like it.