Home  >  Article  >  Backend Development  >  Python实现的下载8000首儿歌的代码分享

Python实现的下载8000首儿歌的代码分享

WBOY
WBOYOriginal
2016-06-10 15:18:531325browse

下载8000首儿歌的python的代码:

复制代码 代码如下:

#-*- coding: UTF-8 -*-

from pyquery import PyQuery as py
from lxml import etree
import urllib
import re
import os
import sys
import logging

def format(filename):
    tuple=(' ',''','\'')
    for char in tuple:
        if (filename.find(char)!=-1):
            filename=filename.replace(char,"_")
    return filename

def download_mp3(mp3_url, filename,dir):      
    f = dir+"\\"+filename
    if os.path.exists(f):
      logger.debug(f+" is existed.")
      return
     
    try:
        open(f, 'wb').write(urllib.urlopen(mp3_url).read())
        logger.debug(  filename + ' is downloaded.')
    except:
        logger.debug( filename + ' is not downloaded.')

       
def download_all_mp3(start,end,dir,logger):
  for x in range(start,end):
    try:
        url = "http://www.youban.com/mp3-d" + str(x) + ".html"
        logger.debug(str(x) + ": "+url)
        doc = py(url=url)
        e = doc('.mp3downloadbox')
        if e is None or e == '':
          logger.debug(url+" is not existed.")
          return
         
        e = unicode(e)
        #logger.debug( e)
        regex = re.compile(ur".*

(.*)

.*downloadboxlist.*?         m = regex.search(e)
        if m is not None:
          title = m.group(1).strip()
          title2 = str(x)+"_"+title + ".mp3"
          #title2 = re.sub(' ','_',title2)
          title2 = format(title2)
          link = m.group(2)
          #logger.debug( "title:" + title + " link:" + link)
          if link == '' or title == '':
            logger.debug(url + " is not useful")
            continue
          logger.debug(str(x)+": "+link)
          download_mp3(link,title2,dir)
    except:
        logger.debug(url+" met exception.")
        continue
     

     
if __name__ == "__main__":
    dir_root = "e:\\song"
    if sys.argv[3] != '': dir_root=sys.argv[3]
   
    start,end = 1,8000
    if sys.argv[1] >= 0 and sys.argv[2]>=0:
      start,end = int(sys.argv[1]),int(sys.argv[2])
      print ("Download from %s to %s.\n" % (start,end))     
   
    dir = dir_root + "\\"+str(start)+"-"+str(end)
    if not os.path.exists(dir):
      os.mkdir(dir)    
    print "Download to " + dir + ".\n"
   
    logger = logging.getLogger("simple")
    logger.setLevel(logging.DEBUG) 
    fh = logging.FileHandler(dir+"\\"+"download.log")
    ch = logging.StreamHandler()
    formatter = logging.Formatter("%(message)s")
    ch.setFormatter(formatter)
    fh.setFormatter(formatter)
    logger.addHandler(ch)
    logger.addHandler(fh)
    download_all_mp3(start,end,dir,logger)

有需要的可以参考继续修改。

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn