In python, how to get the final URL corresponding to a short link. There are many Taobao short links now. I need to get the final URL to which the short link jumps. Is there any good way to do this? Some are 302 and some are redirected directly on the web page using js. How to get these
黄舟2017-05-19 10:09:33
Use selenium+phantonjs...
http://stackoverflow.com/ques...
#!/usr/bin/python2.7
from twisted.internet import reactor
from twisted.internet.defer import Deferred, DeferredList, DeferredLock
from twisted.internet.defer import inlineCallbacks
from twisted.web.client import Agent, HTTPConnectionPool
from twisted.web.http_headers import Headers
from pprint import pprint
from collections import defaultdict
from urlparse import urlparse
from random import randrange
import fileinput
pool = HTTPConnectionPool(reactor)
pool.maxPersistentPerHost = 16
agent = Agent(reactor, pool)
locks = defaultdict(DeferredLock)
locations = {}
def getLock(url, simultaneous = 1):
return locks[urlparse(url).netloc, randrange(simultaneous)]
@inlineCallbacks
def getMapping(url):
# Limit ourselves to 4 simultaneous connections per host
# Tweak this as desired, but make sure that it no larger than
# pool.maxPersistentPerHost
lock = getLock(url,4)
yield lock.acquire()
try:
resp = yield agent.request('HEAD', url)
locations[url] = resp.headers.getRawHeaders('location',[None])[0]
except Exception as e:
locations[url] = str(e)
finally:
lock.release()
And you can try the pip package
https://pypi.python.org/pypi/...
from urlunshort import resolve
resolve("http://bit.ly/qlKaI") 结果 'http://bitbucket.org/runeh/urlunshort/'