搜尋
首頁後端開發Python教學python实现爬虫下载漫画示例

代码如下:


#!/usr/bin/python3.2
import os,socket
import urllib
import urllib.request,threading,time
import re,sys
global manhuaweb,weburl,floder,chapterbegin,currentthreadnum,threadcount,mutex,mutex2

weburl=''
floder=''
chapterbegin=0
currentthreadnum=0
threadcount=6


if len(sys.argv)>=3:
  weburl=sys.argv[1]
  floder=sys.argv[2]
else:
    print("usag: downloadmanhua weburl floder chapterbegin=0 threadnnum=6")
    sys.exit(0)
if len(sys.argv)>=4:
  chapterbegin=int(sys.argv[3])
if len(sys.argv)>=5:
  threadcount=(int)(sys.argv[4])

 

def jin(i,jinzhi):
        finalans=""
        answer=i%jinzhi
        i=int(i/jinzhi)
        if answer>9:
                finalans=finalans+chr(ord('a')+(answer-10))
        else:
                finalans=finalans+str(answer)
        if i!=0:
                finalans=jin(i,jinzhi)+finalans
        return finalans
def urlparse(p,a,c,k):
        d={}
        e=lambda c:     jin(c,36)
        if 1:
                while c:
                        c=c-1
                        if not k[c]:
                                d[jin(c,36)]=jin(c,36)
                        else:
                                d[jin(c,36)]=k[c]
                k=[lambda e:d[e]]
                e=lambda c:'\\w+'
                c=1
        newstr=""
        while c:
                c=c-1
                if k[c]:
                        for i in range(0,len(p)):
                                tempi=p[i]
                                tempi=ord(tempi)
                                if tempi>=ord('a') and tempi                                        newstr+=d[chr(tempi)]
                                elif tempi>=ord('0') and tempi                                        newstr+=d[chr(tempi)]
                                else:
                                        newstr+=chr(tempi)
        return newstr
def meispower(s):
        p=re.compile(r"(?=\}\().*",re.IGNORECASE)
        s=p.findall(s)
        s=s[0]
        s=s[0:(len(s)-19)]
        par=s.split(',')
        par[3]=par[3][1:len(par[3])]
        answer=par[3].split('|')
        chapterpath=urlparse(par[0],int(par[1]),int(par[2]),answer)
        allurl=re.findall('imgpath=[^;]*',chapterpath)[0]
        allurl=allurl[10:(len(allurl)-2)]
        return allurl
def pictofile(weburl,filename,loop=100):
        if loop                print('can\'t download the picture %s'%weburl)
                return
        loop=loop-1
        if os.path.exists(filename):
            return
        try:
                url=urllib.request.urlopen(weburl)
                data=url.read()
                if len(data)                        url.close()
                        pictofile(weburl,filename,loop)
                else:
                        print('download from %s name is %s\n'%(weburl,filename))
                        myfile=open('%s'%filename,'wb')
                        myfile.write(data)
                        myfile.close()
                        url.close();
        except socket.timeout:
                print('timeout')
                pictofile(weburl,filename,loop)
        except Exception as e:
          print('error',e)
          pictofile(weburl,filename,loop)
        finally:
            pass
def downloadpic(url,loadpicdir,num):
    #download the all url picture to loadpicdir
    global currentthreadnum,mutex,mutex2
    mymode=re.compile(r'[0-9a-z.]*\Z')
    try:
                mutex2.acquire()
                os.chdir(loadpicdir)
                mutex2.release()
    except:
                print("can't open the floder %s will be create"%loadpicdir)
                try:
                    if(mutex2.locked()):
                        os.mkdir(loadpicdir)
                        os.chdir(loadpicdir)
                        mutex2.release()
                    print('create floder succeed')
                except:
                    print("can't create floder %s"%loadpicdir)
                    if(mutex.acquire()):
                        mutex.release()
                    quit(0)
    name=mymode.findall(url)
    filename='manhua'+name[0]
    pictofile(url,loadpicdir+'//'+str(num)+'-'+filename)
    mutex.acquire()
    currentthreadnum=currentthreadnum-1
    mutex.release()
def downloadchapter(url,loadpicdir,num,begin=0):
        global manhuaweb,threadcount,currentthreadnum,mutex
        print(manhuaweb+url)
        webdata=urllib.request.urlopen(manhuaweb+url).read()
        webdata=webdata.decode('UTF-8')
        chaptername=re.findall(r'

[^_]*',webdata)[0]<br>        chaptername=chaptername[7:len(chaptername)]<br>        webscrip=re.findall(r'eval.*[^]',webdata)<br>        chapterurl=meispower(webscrip[0]);<br>        chapterurl='http://mhimg.ali213.net'+chapterurl<br>        for i in range(begin,num):<br>                try:<br>                        while(currentthreadnum>=threadcount):<br>                                time.sleep(0.5)<br>                        mutex.acquire()<br>                        currentthreadnum=currentthreadnum+1<br>                        mutex.release()<br>                        threading.Thread(target=downloadpic,args=(r'%s%d.jpg'%(chapterurl,i),loadpicdir+chaptername,num)).start()<br>                except socket.error:<br>                        mutex.acquire()<br>                        i=i-1<br>                        currentthreadnum=currentthreadnum-1<br>                        mutex.release()<br>                except Exception as error:<br>                        print(error,'break')<br>                        print('download chapter %d of picture make a error'%i)<br>                        break<br>if __name__=='__main__':<br>        manhuaweb=r'http://manhua.ali213.net'<br>        socket.setdefaulttimeout(60.0)<br>        mutex=threading.Lock()<br>        mutex2=threading.Lock() <p>        <br>        webfile=urllib.request.urlopen(weburl)<br>        webdata=webfile.read();<br>        webdata=webdata.decode('UTF-8')<br>        meshmode=re.compile(r'</p> <div class="detail_body_right_sec_con">.*</div>')<br>        meshdata=meshmode.findall(webdata)[0]<br>        indexmode=re.compile(r'([0-9]*页)')<br>        indexdata=indexmode.findall(meshdata) <p>        picurlmode=re.compile(r'/comic/[0-9/]*.html')<br>        picurldata=picurlmode.findall(meshdata)</p> <p><br>        chapterlength=len(picurldata)<br>        nummode=re.compile(r'[\d]+')</p> <p>        i=chapterbegin<br>        while i<chapterlength:>                manhuachapter=picurldata[chapterlength-i-1]<br>                downloadchapter(manhuachapter,floder,int(nummode.findall(indexdata[chapterlength-i-1])[0]))<br>                i=i+1<br></chapterlength:></p>
陳述
本文內容由網友自願投稿,版權歸原作者所有。本站不承擔相應的法律責任。如發現涉嫌抄襲或侵權的內容,請聯絡admin@php.cn
Python:深入研究彙編和解釋Python:深入研究彙編和解釋May 12, 2025 am 12:14 AM

pythonisehybridmodeLofCompilation和interpretation:1)thepythoninterpretercompilesourcecececodeintoplatform- interpententbybytecode.2)thepythonvirtualmachine(pvm)thenexecutecutestestestestestesthisbytecode,ballancingEaseofuseEfuseWithPerformance。

Python是一種解釋或編譯語言,為什麼重要?Python是一種解釋或編譯語言,為什麼重要?May 12, 2025 am 12:09 AM

pythonisbothinterpretedAndCompiled.1)它的compiledTobyTecodeForportabilityAcrosplatforms.2)bytecodeisthenInterpreted,允許fordingfordforderynamictynamictymictymictymictyandrapiddefupment,儘管Ititmaybeslowerthananeflowerthanancompiledcompiledlanguages。

對於python中的循環時循環與循環:解釋了關鍵差異對於python中的循環時循環與循環:解釋了關鍵差異May 12, 2025 am 12:08 AM

在您的知識之際,而foroopsareideal insinAdvance中,而WhileLoopSareBetterForsituations則youneedtoloopuntilaconditionismet

循環時:實用指南循環時:實用指南May 12, 2025 am 12:07 AM

ForboopSareSusedwhenthentheneMberofiterationsiskNownInAdvance,而WhileLoopSareSareDestrationsDepportonAcondition.1)ForloopSareIdealForiteratingOverSequencesLikelistSorarrays.2)whileLeleLooleSuitableApeableableableableableableforscenarioscenarioswhereTheLeTheLeTheLeTeLoopContinusunuesuntilaspecificiccificcificCondond

Python:它是真正的解釋嗎?揭穿神話Python:它是真正的解釋嗎?揭穿神話May 12, 2025 am 12:05 AM

pythonisnotpuroly interpred; itosisehybridablectofbytecodecompilationandruntimeinterpretation.1)PythonCompiLessourceceCeceDintobyTecode,whitsthenexecececected bytybytybythepythepythepythonvirtirtualmachine(pvm).2)

與同一元素的Python串聯列表與同一元素的Python串聯列表May 11, 2025 am 12:08 AM

concatenateListSinpythonWithTheSamelements,使用:1)operatoTotakeEpduplicates,2)asettoremavelemavphicates,or3)listcompreanspherensionforcontroloverduplicates,每個methodhasdhasdifferentperferentperferentperforentperforentperforentperfornceandordorimplications。

解釋與編譯語言:Python的位置解釋與編譯語言:Python的位置May 11, 2025 am 12:07 AM

pythonisanterpretedlanguage,offeringosofuseandflexibilitybutfacingperformancelanceLimitationsInCricapplications.1)drightingedlanguageslikeLikeLikeLikeLikeLikeLikeLikeThonexecuteline-by-line,允許ImmediaMediaMediaMediaMediaMediateFeedBackAndBackAndRapidPrototypiD.2)compiledLanguagesLanguagesLagagesLikagesLikec/c thresst

循環時:您什麼時候在Python中使用?循環時:您什麼時候在Python中使用?May 11, 2025 am 12:05 AM

Useforloopswhenthenumberofiterationsisknowninadvance,andwhileloopswheniterationsdependonacondition.1)Forloopsareidealforsequenceslikelistsorranges.2)Whileloopssuitscenarioswheretheloopcontinuesuntilaspecificconditionismet,usefulforuserinputsoralgorit

See all articles

熱AI工具

Undresser.AI Undress

Undresser.AI Undress

人工智慧驅動的應用程序,用於創建逼真的裸體照片

AI Clothes Remover

AI Clothes Remover

用於從照片中去除衣服的線上人工智慧工具。

Undress AI Tool

Undress AI Tool

免費脫衣圖片

Clothoff.io

Clothoff.io

AI脫衣器

Video Face Swap

Video Face Swap

使用我們完全免費的人工智慧換臉工具,輕鬆在任何影片中換臉!

熱門文章

熱工具

記事本++7.3.1

記事本++7.3.1

好用且免費的程式碼編輯器

SublimeText3漢化版

SublimeText3漢化版

中文版,非常好用

禪工作室 13.0.1

禪工作室 13.0.1

強大的PHP整合開發環境

SublimeText3 Linux新版

SublimeText3 Linux新版

SublimeText3 Linux最新版

WebStorm Mac版

WebStorm Mac版

好用的JavaScript開發工具