脚本藏身之处不过有四: 1、3f1c4e4b6b16bbbd69b2ee476dc4f83a标签、2cdf5bf648cf2f33323966d7f58a7f3f标签、c9ccee2e6ea535a969eb3f532ad9fe89标签、iframe标签 2、on开头的标签属性 3、javascript(vbscript)伪协议 4、css的epression
下面是他们的字符串规则:
1、5515dcb6f3db56a151665740c1986813\s*
2、\s*on[a-z]+\s*=\s*("[^"]+"|'[^']+'|[^\s]+)\s*(?=>)
3、\s*(href|src)\s*=\s*("\s*(javascript|vbscript):[^"]+"|'\s*(javascript|vbscript):[^']+'|(javascript|vbscript):[^\s]+)\s*(?=>)
4、epression\((.|\n)*\);?
了解他们的规则后,抓虫行动就水到渠成。
<textarea id="bug" cols="80" rows="16"> <button id="kick">抓虫1</button> <script> function kickBug(str) { return str.replace(/<(script|link|style|iframe)(.|\n)*\/\1>\s*/ig,""); } </script> <iframe></iframe> <link href='test.css'></link> <style> a { height:expression(alert('hei')); } </style> </textarea> <button id="kick">抓虫1</button> <script> function kickBug(str) { return str.replace(/<(script|link|style|iframe)(.|\n)*\/\1>\s*/ig,""); } if(!/msie/i.test(navigator.userAgent)){ HTMLElement.prototype.__defineGetter__("innerText",function(){ return this.textContent; }); HTMLElement.prototype.__defineSetter__("innerText",function(text){ this.textContent = text; }); } document.getElementById("kick").onclick = function() { var bug = document.getElementById("bug"); bug.innerText = kickBug(bug.innerText); } </script>
<textarea id="bug" cols="80" rows="5"> <a onclick="test(); test1()" onblur= "test3()">test</a> </textarea> <button id="kick">抓虫2</button> <script> function kickBug(str) { return str.replace(/<[a-z][^>]*\s*on[a-z]+\s*=[^>]+/ig,function($0,$1){ return $0.replace(/\s*on[a-z]+\s*=\s*("[^"]+"|'[^']+'|[^\s]+)\s*/ig,""); }); } if(!/msie/i.test(navigator.userAgent)){ HTMLElement.prototype.__defineGetter__("innerText",function(){ return this.textContent; }); HTMLElement.prototype.__defineSetter__("innerText",function(text){ this.textContent = text; }); } document.getElementById("kick").onclick = function() { var bug = document.getElementById("bug"); bug.innerText = kickBug(bug.innerText); } </script>
<textarea id="bug" cols="80" rows="5"> <a onclick="test();" href=" jAvascript:alert('a')" href="jAvascript:" href="vbscript:alert()" >test</a> </textarea> <button id="kick">抓虫3</button> <script> function kickBug(str) { return str.replace(/<[a-z][^>]*\s*(href|src)\s*=[^>]+/ig,function($0,$1){ $0 = $0.replace(/&#(6[5-9]|[78][0-9]|9[0789]|1[01][0-9]|12[012]);?/g,function($0,$1){return String.fromCharCode($1);}); return $0.replace(/\s*(href|src)\s*=\s*("\s*(javascript|vbscript):[^"]+"|'\s*(javascript|vbscript):[^']+'|(javascript|vbscript):[^\s]+)/ig,""); }); } if(!/msie/i.test(navigator.userAgent)){ HTMLElement.prototype.__defineGetter__("innerText",function(){ return this.textContent; }); HTMLElement.prototype.__defineSetter__("innerText",function(text){ this.textContent = text; }); } document.getElementById("kick").onclick = function() { var bug = document.getElementById("bug"); bug.innerText = kickBug(bug.innerText); } </script>
<textarea id="bug" cols="80" rows="5"> expression() <a style="color:expression( 'red' )">test</a> </textarea> <button id="kick">抓虫4</button> <script> function kickBug(str) { return str.replace(/<[a-z][^>]*\s*style\s*=[^>]+/ig,function($0,$1){ $0 = $0.replace(/&#(6[5-9]|[78][0-9]|9[0789]|1[01][0-9]|12[012]);?/g,function($0,$1){return String.fromCharCode($1);}); return $0.replace(/\s*style\s*=\s*("[^"]+(expression)[^"]+"|'[^']+\2[^']+'|[^\s]+\2[^\s]+)\s*/ig,""); }); } if(!/msie/i.test(navigator.userAgent)){ HTMLElement.prototype.__defineGetter__("innerText",function(){ return this.textContent; }); HTMLElement.prototype.__defineSetter__("innerText",function(text){ this.textContent = text; }); } document.getElementById("kick").onclick = function() { var bug = document.getElementById("bug"); bug.innerText = kickBug(bug.innerText); } </script>
这样调用就可以
k1(k2(k3(k4(str))))
这样就是单纯地过滤脚本而已,所谓过滤“危险脚本”应该是能够判断哪些属于“危险"脚本,不危险的就不过滤才对……那可就难办了,相当于防火墙了。