Home > Article > Backend Development > Create Baidu Dictionary word search collector with PHP_PHP tutorial
This article mainly introduces the relevant information about PHP making Baidu Dictionary word search collector. Friends who need it can refer to it
Baidu dict collection sample
Written to collect all result data after translation from Baidu dict dictionary. Of course, it comes with a 13.5w word library and simple collection cases. Here I will release the main class dict.class.php I wrote. The project address is http:/ /github.com/widuu/baidu_dict, if you need it, just fork it~ Well, very few people use this thing, so the useful brothers took it away~
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 |
/** * dict.class.php collects Baidu dictionary translation content * * @copyright (C) 2014 widuu * @license http://www.widuu.com * @lastmodify 2014-2-15 */
header("content-type:text/html;charset=utf8"); class Dict{
private $word;
//显示的条数 private static $num = 10;
public function __construct(){}
/** * Public method of returning Baidu collected data * @param string English word * retun array( * symbol" => phonetic symbol * "pro" => Pronunciation * "example"=> Example * "explain"=> concise explanation * "synonym"=> Synonyms and antonyms * "phrase" => Phrase array * ) * */ public function content($word){ $this -> word = $word; $symbol = $this -> Pronounced(); $pro = $this->getSay(); $example = $this -> getExample(); $explain = $this -> getExplain(); $synonym = $this -> getSynonym(); $phrase = $this -> getPhrase(); $result = array( "symbol" => $symbol, //音标 "pro" => $pro, //发音 "example"=> $example, //例句 "explain"=> $explain, //简明释义 "synonym"=> $synonym, //同反义词 "phrase" => $phrase //短语数组 ); return $result; }
/** * Remotely obtain Baidu translation content * get function curl * retun string * */
private function getContent(){ $useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0"; $ch = curl_init(); $url = "http://dict.baidu.com/s?wd=".$this->word; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_USERAGENT,$useragent); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_HTTPGET, 1); curl_setopt($ch, CURLOPT_AUTOREFERER,1); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_TIMEOUT, 30); $result = curl_exec($ch); if (curl_errno($curl)) { echo 'Errno'.curl_error($curl); } curl_close($ch); return $result; }
/** * Get Baidu translation pronunciation * retun array(UK, US) * */
private function Pronounced(){ $data = $this -> getContent(); preg_match_all("/"EN-US">(.*)/Ui",$data,$pronounced); return array( 'en' => $pronounced[1][0], 'us' => $pronounced[1][1] ); }
/** * Get Baidu translation pronunciation * return array(UK, US) * */
private function getSay(){ $data = $this -> getContent(); preg_match_all("/url="(.*)"/Ui",$data,$pronounced); return array( 'en' => $pronounced[1][0], 'us' => $pronounced[1][1] ); }
/** * Get Baidu translation examples * return array() multidimensional array Example * */
private function getExample(){ $str = ""; $data = $this -> getContent(); preg_match_all("/var example_data = (.*)];/Us",$data,$example); $data1 = "[[[".ltrim($example[1][0],"["); $data2 = explode("[[[",$data1); $num = count(array_filter($data2)); foreach($data2 as $key => $value){ $data3 = explode("[[","[[".$value); foreach ($data3 as $k => $v) { preg_match_all("/["(.*)",/Us","[".$v, $match); if(!empty($match[1])){ $str .= implode($match[1]," ")."@"; } } } $data4 = trim($str,"@"); $data5 = explode("@", $data4); $result = array_chunk($data5, 2); return $result; }
/** * Get a concise explanation * return array (x => "Part of Speech", b => "Attachment") * **/
private function getExplain(){ $data = $this -> getContent(); preg_match_all("/id="en-simple-means">(.*) /Us",$data,$explain);
$r_data = $explain[1][0]; preg_match_all("/ (?P preg_match_all("/(?P
$result = array(); foreach ($a_data["adj"] as $key => $value) { $result[$value] = $a_data["name"][$key]; }
$word_b = array(); foreach ($b_data["tag"] as $key => $value) { $word_b[$value] = strip_tags($b_data["word"][$key]); }
$result_data = array("x" => $result,"b" => $word_b);
return $result_data; }
/** * Get synonyms * return array(0 => "Synonym", 1 => "Antonym") usually a multi-dimensional array * */
private function getSynonym(){ $data = $this -> getContent(); preg_match_all("/id="en-syn-ant">(.*) /Us",$data,$synonym);
$content = $synonym[1][0]; $data1 = explode("", $content); $result = array(); $data2 = array(); foreach ($data1 as $key => $value) { preg_match_all("/(?P
$data2[$key]["adj"] = $r_data["adj"]; $data2[$key]["content"] = $r_data["content"]; }
foreach ($data2 as $key => $value) { foreach ($value["content"] as $k => $v) { if(!empty($v)){ preg_match_all("/ (?P foreach ($v_data['title'] as $m => $d) { $data = strip_tags(preg_replace("<>"," ", $v_data["value"][$m])); $result[$key][$value["adj"][$k]][$d] = $data; } } } } return $result; }
/** * Get phrase phrase * return array (key => value) one-dimensional or multi-dimensional array * */
private function getPhrase(){ $num = self::$num; $data = $this -> getContent(); preg_match_all("/id="en-phrase">(.*) /Us",$data,$phrase);
$data = explode("",$phrase[1][0]); $data1 = array_slice($data,0,$num); $result = array(); foreach ($data1 as $key => $value) { $data2 = explode(" ", $value);$n = count($data2); if($n<=3){ $result[str_replace(" ","",strip_tags($data2[0]))] = strip_tags($data2[1]); }else{ $data3 = array_slice($data2,0,$n-1); $data4 = array_slice($data2,0,2); $res = array_diff($data3,$data4); $data5 = array_chunk($res,2); $key_value = trim(str_replace(" ","",strip_tags($data4[0]))); $result[$key_value] = strip_tags($data4[1]); foreach ($data5 as $key => $value) { foreach ($value as $k => $v) { $value[$k] = strip_tags($v); } $array = array($result[$key_value],$value); if (array_key_exists($key_value, $result)){ $result[$key_value] = $array; } }
} } return $result; }
/** * Convert array to string * * @param array $data array * @param bool $isformdata If it is 0, new_stripslashes processing is not used, optional parameter, default is 1 * @return string Returns a string, if data is empty, then returns empty */ private function array2string($data, $isformdata = 1) { if($data == '') return ''; if($isformdata) $data = $this->new_stripslashes($data); return addslashes(var_export($data, TRUE)); }
/** * Returns the string or array processed by stripslashes * @param $string The string or array to be processed * @return mixed */ private function new_stripslashes($string) { if(!is_array($string)) return stripslashes($string); foreach($string as $key => $val) $string[$key] = $this->new_stripslashes($val); return $string; }
}
// $word = new dict("express"); // $word ->content(); |
The above is the entire content of this article. It is a very practical function. I hope you will like it.