Rumah > Soal Jawab > teks badan
def replace_real_url(html, curr_url):
"""
将 html 中的相对路径替换为 绝对路径
:param html:
:param curr_url:
:return:
"""
if html and curr_url:
pattern = ur'<([a-z]{1,5})[^><]*(href|src)=["\']{0,1}([^"\']+)["\']{0,1}[^><]*>'
html = re.sub(pattern, lambda x: replace_real_url_callback(x, curr_url), unicode(html), re.I | re.M)
return html
def replace_real_url_callback(repl, curr_url):
"""
执行替换
:param repl:
:param curr_url:
:return:
"""
ret = repl.group()
if repl and repl.lastindex == 3 and repl.group(1).lower() in ['a', 'img']:
url = urljoin(curr_url, repl.group(3))
ret = re.sub(ur'' + re.escape(repl.group(3)), unicode(url), ret)
return ret
如何修正这个替换的方法。
PHP中文网2017-04-18 10:31:10
print re.sub('(<img src=")(.+?)(" />)', r'aa', 'aa<img src="/aaa.jpg" />bb')
# aa<img src="aa" />bb