Heim  >  Artikel  >  Backend-Entwicklung  >  这种防爬虫机制怎么过?

这种防爬虫机制怎么过?

WBOY
WBOYOriginal
2016-06-06 20:10:521956Durchsuche

这个网站freeget.co/ 带了防爬机制,但不知道这是何种防爬这么厉害的。

不带header头返回的是

<code>

<meta name="robots" content="noindex,nofollow">
<script>
(function() {  function getSessionCookies() {   cookieArray = new Array();   var cName = /^\s?incap_ses_/;   var c = document.cookie.split(";");   for (var i = 0; i < c.length; i++) {    key = c[i].substr(0, c[i].indexOf("="));    value = c[i].substr(c[i].indexOf("=") + 1, c[i].length);    if (cName.test(key)) {     cookieArray[cookieArray.length] = value    }   }   return cookieArray  }  function setIncapCookie(vArray) {   try {    cookies = getSessionCookies();    digests = new Array(cookies.length);    for (var i = 0; i < cookies.length; i++) {     digests[i] = simpleDigest((vArray) + cookies[i])    }    res = vArray + ",digest=" + (digests.join())   } catch (e) {    res = vArray + ",digest=" + (encodeURIComponent(e.toString()))   }   createCookie("___utmvc", res, 20)  }  function simpleDigest(mystr) {   var res = 0;   for (var i = 0; i < mystr.length; i++) {    res += mystr.charCodeAt(i)   }   return res  }  function createCookie(name, value, seconds) {   if (seconds) {    var date = new Date();    date.setTime(date.getTime() + (seconds * 1000));    var expires = "; expires=" + date.toGMTString()   } else {    var expires = ""   }   document.cookie = name + "=" + value + expires + "; path=/"  }  function test(o) {   var res = "";   var vArray = new Array();   for (var j = 0; j < o.length; j++) {    var test = o[j][0]    switch (o[j][1]) {    case "exists":     try {      if(typeof(eval(test)) != "undefined"){         vArray[vArray.length] = encodeURIComponent(test + "=true")      }      else{         vArray[vArray.length] = encodeURIComponent(test + "=false")      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=false")     }     break;    case "value":     try {      vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=" + e)     }     break;     case "value_or_nil":         try{             if(typeof(eval(test)) != "undefined"){                 vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())             }             else{                 vArray[vArray.length] = encodeURIComponent(test + "=nil")             }                     }         catch(e){             vArray[vArray.length] = encodeURIComponent(test + ":" + test_type + "=" + e)         }         break;     case "plugin_extentions":         try {             extentions = []             try {                 i = extentions.indexOf("i")             } catch (e) {                 vArray[vArray.length] = encodeURIComponent("plugin_ext=indexOf is not a function")                 break;             }                         for (var i=0;i<navigator.plugins.length;i++) {                 if (typeof(navigator.plugins[i]) == "undefined") {                     vArray[vArray.length] = encodeURIComponent("plugin_ext=plugins[i] is undefined");                     break;                 }                                 filename = navigator.plugins[i].filename                 if (typeof(filename) == "undefined") {                     vArray[vArray.length] = encodeURIComponent("plugin_ext=filename is undefined");                     break;                 }                 if (filename.split(".").length == 2) {                     ext = filename.split(".")[1]                     if (extentions.indexOf(ext) < 0) {                         extentions.push(ext)                     }                                 }                 }                          for    (i = 0; i < extentions.length; i++) {                 vArray[vArray.length] = encodeURIComponent("plugin_ext=" + extentions[i]);             }         } catch (e) {          vArray[vArray.length] = encodeURIComponent("plugin_ext=" + e)         }         break;     case "plugins":     try{         p=navigator.plugins         pres=""         for (a in p){pres+=(p[a]['description']+" ").substring(0,20)}         vArray[vArray.length] = encodeURIComponent("plugins=" + pres);         }     catch(e){         vArray[vArray.length] = encodeURIComponent("plugins=" +e);         }     break;      case "plugin":     try {      a = navigator.plugins;      for (i in a) {       f = a[i]["filename"].split(".");       if (f.length == 2) {        vArray[vArray.length] = encodeURIComponent("plugin=" + f[1]);        break       }      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent("plugin=" + e)     }     break    }   }   vArray = vArray.join();   return vArray  }  var o = [   ["navigator", "exists"],   ["navigator.vendor", "value"],   ["navigator.vendor", "value_or_nil"],   ["opera", "exists"],   ["ActiveXObject", "exists"],   ["navigator.appName", "value"],   ["navigator.appName", "value_or_nil"],   ["platform", "plugin"],   ["platform", "plugin_extentions"],   ["webkitURL", "exists"],   ["navigator.plugins.length==0", "value"],   ["navigator.plugins.length==0", "value_or_nil"],   ["_phantom", "exists"] ];  try {   setIncapCookie(test(o));   document.createElement("img").src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + Math.random()  } catch (e) {   img = document.createElement("img");   img.src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + e  } })();
</script>
<script>
(function() { 
var z="";var b="7472797B766172207868723B76617220743D6E6577204461746528292E67657454696D6528293B766172207374617475733D227374617274223B7661722074696D696E673D6E65772041727261792833293B77696E646F772E6F6E756E6C6F61643D66756E6374696F6E28297B74696D696E675B325D3D22723A222B286E6577204461746528292E67657454696D6528292D74293B646F63756D656E742E637265617465456C656D656E742822696D6722292E7372633D222F5F496E63617073756C615F5265736F757263653F4553324C555243543D363726743D373826643D222B656E636F6465555249436F6D706F6E656E74287374617475732B222028222B74696D696E672E6A6F696E28292B222922297D3B69662877696E646F772E584D4C4874747052657175657374297B7868723D6E657720584D4C48747470526571756573747D656C73657B7868723D6E657720416374697665584F626A65637428224D6963726F736F66742E584D4C4854545022297D7868722E6F6E726561647973746174656368616E67653D66756E6374696F6E28297B737769746368287868722E72656164795374617465297B6361736520303A7374617475733D6E6577204461746528292E67657454696D6528292D742B223A2072657175657374206E6F7420696E697469616C697A656420223B627265616B3B6361736520313A7374617475733D6E6577204461746528292E67657454696D6528292D742B223A2073657276657220636F6E6E656374696F6E2065737461626C6973686564223B627265616B3B6361736520323A7374617475733D6E6577204461746528292E67657454696D6528292D742B223A2072657175657374207265636569766564223B627265616B3B6361736520333A7374617475733D6E6577204461746528292E67657454696D6528292D742B223A2070726F63657373696E672072657175657374223B627265616B3B6361736520343A7374617475733D22636F6D706C657465223B74696D696E675B315D3D22633A222B286E6577204461746528292E67657454696D6528292D74293B6966287868722E7374617475733D3D323030297B706172656E742E6C6F636174696F6E2E72656C6F616428297D627265616B7D7D3B74696D696E675B305D3D22733A222B286E6577204461746528292E67657454696D6528292D74293B7868722E6F70656E2822474554222C222F5F496E63617073756C615F5265736F757263653F535748414E45444C3D343633303637323631383038353637323936362C363535313031303239383433303638333038302C31353637393036393730303739363436333136352C333439303537222C66616C7365293B7868722E73656E64286E756C6C297D63617463682863297B7374617475732B3D6E6577204461746528292E67657454696D6528292D742B2220696E6361705F6578633A20222B633B646F63756D656E742E637265617465456C656D656E742822696D6722292E7372633D222F5F496E63617073756C615F5265736F757263653F4553324C555243543D363726743D373826643D222B656E636F6465555249436F6D706F6E656E74287374617475732B222028222B74696D696E672E6A6F696E28292B222922297D3B";for (var i=0;i<b.length;i+=2){z=z+parseInt(b.substring(i, i+2), 16)+",";}z = z.substring(0,z.length-1); eval(eval('String.fromCharCode('+z+')'));})();
</script>

<iframe style="display:none;visibility:hidden;" src="//content.incapsula.com/jsTest.html" id="gaIframe"></iframe>
</code>

还有下面的的带了header头的

<code>
    
    
        <meta name="ROBOTS" content="NOINDEX, NOFOLLOW">
        <meta name="format-detection" content="telephone=no">
        <meta name="viewport" content="initial-scale=1.0">
        <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    
    
    
        <iframe src="/_Incapsula_Resource?CWUDNSAI=9&xinfo=6-56800294-0%200NNN%20RT(1460895856602%200)%20q(0%20-1%20-1%20-1)%20r(0%20-1)%20B12(4,315,0)%20U10000&incident_id=406000510241030862-742862014603723574&edet=12&cinfo=04000000" frameborder="0" width="100%" height="100%" marginheight="0px" marginwidth="0px">
            Request unsuccessful. Incapsula incident ID: 406000510241030862-742862014603723574
        </iframe>
    

</code>

回复内容:

这个网站freeget.co/ 带了防爬机制,但不知道这是何种防爬这么厉害的。

不带header头返回的是

<code>

<meta name="robots" content="noindex,nofollow">
<script>
(function() {  function getSessionCookies() {   cookieArray = new Array();   var cName = /^\s?incap_ses_/;   var c = document.cookie.split(";");   for (var i = 0; i < c.length; i++) {    key = c[i].substr(0, c[i].indexOf("="));    value = c[i].substr(c[i].indexOf("=") + 1, c[i].length);    if (cName.test(key)) {     cookieArray[cookieArray.length] = value    }   }   return cookieArray  }  function setIncapCookie(vArray) {   try {    cookies = getSessionCookies();    digests = new Array(cookies.length);    for (var i = 0; i < cookies.length; i++) {     digests[i] = simpleDigest((vArray) + cookies[i])    }    res = vArray + ",digest=" + (digests.join())   } catch (e) {    res = vArray + ",digest=" + (encodeURIComponent(e.toString()))   }   createCookie("___utmvc", res, 20)  }  function simpleDigest(mystr) {   var res = 0;   for (var i = 0; i < mystr.length; i++) {    res += mystr.charCodeAt(i)   }   return res  }  function createCookie(name, value, seconds) {   if (seconds) {    var date = new Date();    date.setTime(date.getTime() + (seconds * 1000));    var expires = "; expires=" + date.toGMTString()   } else {    var expires = ""   }   document.cookie = name + "=" + value + expires + "; path=/"  }  function test(o) {   var res = "";   var vArray = new Array();   for (var j = 0; j < o.length; j++) {    var test = o[j][0]    switch (o[j][1]) {    case "exists":     try {      if(typeof(eval(test)) != "undefined"){         vArray[vArray.length] = encodeURIComponent(test + "=true")      }      else{         vArray[vArray.length] = encodeURIComponent(test + "=false")      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=false")     }     break;    case "value":     try {      vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=" + e)     }     break;     case "value_or_nil":         try{             if(typeof(eval(test)) != "undefined"){                 vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())             }             else{                 vArray[vArray.length] = encodeURIComponent(test + "=nil")             }                     }         catch(e){             vArray[vArray.length] = encodeURIComponent(test + ":" + test_type + "=" + e)         }         break;     case "plugin_extentions":         try {             extentions = []             try {                 i = extentions.indexOf("i")             } catch (e) {                 vArray[vArray.length] = encodeURIComponent("plugin_ext=indexOf is not a function")                 break;             }                         for (var i=0;i<navigator.plugins.length;i++) {                 if (typeof(navigator.plugins[i]) == "undefined") {                     vArray[vArray.length] = encodeURIComponent("plugin_ext=plugins[i] is undefined");                     break;                 }                                 filename = navigator.plugins[i].filename                 if (typeof(filename) == "undefined") {                     vArray[vArray.length] = encodeURIComponent("plugin_ext=filename is undefined");                     break;                 }                 if (filename.split(".").length == 2) {                     ext = filename.split(".")[1]                     if (extentions.indexOf(ext) < 0) {                         extentions.push(ext)                     }                                 }                 }                          for    (i = 0; i < extentions.length; i++) {                 vArray[vArray.length] = encodeURIComponent("plugin_ext=" + extentions[i]);             }         } catch (e) {          vArray[vArray.length] = encodeURIComponent("plugin_ext=" + e)         }         break;     case "plugins":     try{         p=navigator.plugins         pres=""         for (a in p){pres+=(p[a]['description']+" ").substring(0,20)}         vArray[vArray.length] = encodeURIComponent("plugins=" + pres);         }     catch(e){         vArray[vArray.length] = encodeURIComponent("plugins=" +e);         }     break;      case "plugin":     try {      a = navigator.plugins;      for (i in a) {       f = a[i]["filename"].split(".");       if (f.length == 2) {        vArray[vArray.length] = encodeURIComponent("plugin=" + f[1]);        break       }      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent("plugin=" + e)     }     break    }   }   vArray = vArray.join();   return vArray  }  var o = [   ["navigator", "exists"],   ["navigator.vendor", "value"],   ["navigator.vendor", "value_or_nil"],   ["opera", "exists"],   ["ActiveXObject", "exists"],   ["navigator.appName", "value"],   ["navigator.appName", "value_or_nil"],   ["platform", "plugin"],   ["platform", "plugin_extentions"],   ["webkitURL", "exists"],   ["navigator.plugins.length==0", "value"],   ["navigator.plugins.length==0", "value_or_nil"],   ["_phantom", "exists"] ];  try {   setIncapCookie(test(o));   document.createElement("img").src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + Math.random()  } catch (e) {   img = document.createElement("img");   img.src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + e  } })();
</script>
<script>
(function() { 
var z="";var b="7472797B766172207868723B76617220743D6E6577204461746528292E67657454696D6528293B766172207374617475733D227374617274223B7661722074696D696E673D6E65772041727261792833293B77696E646F772E6F6E756E6C6F61643D66756E6374696F6E28297B74696D696E675B325D3D22723A222B286E6577204461746528292E67657454696D6528292D74293B646F63756D656E742E637265617465456C656D656E742822696D6722292E7372633D222F5F496E63617073756C615F5265736F757263653F4553324C555243543D363726743D373826643D222B656E636F6465555249436F6D706F6E656E74287374617475732B222028222B74696D696E672E6A6F696E28292B222922297D3B69662877696E646F772E584D4C4874747052657175657374297B7868723D6E657720584D4C48747470526571756573747D656C73657B7868723D6E657720416374697665584F626A65637428224D6963726F736F66742E584D4C4854545022297D7868722E6F6E726561647973746174656368616E67653D66756E6374696F6E28297B737769746368287868722E72656164795374617465297B6361736520303A7374617475733D6E6577204461746528292E67657454696D6528292D742B223A2072657175657374206E6F7420696E697469616C697A656420223B627265616B3B6361736520313A7374617475733D6E6577204461746528292E67657454696D6528292D742B223A2073657276657220636F6E6E656374696F6E2065737461626C6973686564223B627265616B3B6361736520323A7374617475733D6E6577204461746528292E67657454696D6528292D742B223A2072657175657374207265636569766564223B627265616B3B6361736520333A7374617475733D6E6577204461746528292E67657454696D6528292D742B223A2070726F63657373696E672072657175657374223B627265616B3B6361736520343A7374617475733D22636F6D706C657465223B74696D696E675B315D3D22633A222B286E6577204461746528292E67657454696D6528292D74293B6966287868722E7374617475733D3D323030297B706172656E742E6C6F636174696F6E2E72656C6F616428297D627265616B7D7D3B74696D696E675B305D3D22733A222B286E6577204461746528292E67657454696D6528292D74293B7868722E6F70656E2822474554222C222F5F496E63617073756C615F5265736F757263653F535748414E45444C3D343633303637323631383038353637323936362C363535313031303239383433303638333038302C31353637393036393730303739363436333136352C333439303537222C66616C7365293B7868722E73656E64286E756C6C297D63617463682863297B7374617475732B3D6E6577204461746528292E67657454696D6528292D742B2220696E6361705F6578633A20222B633B646F63756D656E742E637265617465456C656D656E742822696D6722292E7372633D222F5F496E63617073756C615F5265736F757263653F4553324C555243543D363726743D373826643D222B656E636F6465555249436F6D706F6E656E74287374617475732B222028222B74696D696E672E6A6F696E28292B222922297D3B";for (var i=0;i<b.length;i+=2){z=z+parseInt(b.substring(i, i+2), 16)+",";}z = z.substring(0,z.length-1); eval(eval('String.fromCharCode('+z+')'));})();
</script>

<iframe style="display:none;visibility:hidden;" src="//content.incapsula.com/jsTest.html" id="gaIframe"></iframe>
</code>

还有下面的的带了header头的

<code>
    
    
        <meta name="ROBOTS" content="NOINDEX, NOFOLLOW">
        <meta name="format-detection" content="telephone=no">
        <meta name="viewport" content="initial-scale=1.0">
        <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    
    
    
        <iframe src="/_Incapsula_Resource?CWUDNSAI=9&xinfo=6-56800294-0%200NNN%20RT(1460895856602%200)%20q(0%20-1%20-1%20-1)%20r(0%20-1)%20B12(4,315,0)%20U10000&incident_id=406000510241030862-742862014603723574&edet=12&cinfo=04000000" frameborder="0" width="100%" height="100%" marginheight="0px" marginwidth="0px">
            Request unsuccessful. Incapsula incident ID: 406000510241030862-742862014603723574
        </iframe>
    

</code>

如果你搜搜Incapsula,你就会知道答案。

和CloudFlare一样,对访客进行分析,判断好坏。

如果你想知道怎么过,那么最简单的方法就是模拟一次真实访问。

一样的IP一样的UA以及自己先访问一次获取到所有COOKIE并使用在爬虫上。

Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn