python 文件批量下载

12月02日,2017 web开发杂七杂八哈二王 3826次

python 文件批量下载

09月22日, 2014 3826次

import urllib
import urllib2
from urllib import unquote
def download(url,opi, passName=None):
    if passName:
        fileName = passName
        urllib.urlretrieve(attachURL, fileName)
    else:
        
        r = urllib.urlopen(url)
        if r.info().has_key('Content-Disposition'):
            fileName = r.info()['Content-Disposition'].split('filename=')[1]
            fileName = fileName.replace('"', '').replace("'", "")
            fileName=unquote(fileName).decode('utf8')
            #print fileName
        elif r.url != url:
            # if we were redirected, the real file name we take from the final URL
            from os.path import basename
            from urlparse import urlsplit
            fileName = basename(urlsplit(r.url)[2])
        f = open('./files/' +str(opi)+fileName, 'wb')
        f.write(r.read())
        f.close()
    #print "File:", fileName,"downloaded"
    print "File:", fileName.encode("GBK", 'ignore') ,"downloaded"
    
    
'''
def download2(url,opi, passName=None):
    if passName:
        fileName = passName
        urllib2.urlretrieve(attachURL, fileName)
    else:
        
        request = urllib2.Request(url)
        request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko')
        request.add_header('Referer', 'https://xxxxx')
        request.add_header('Cookie', 'ASP.NET_SessionId=qfhwwnbglzwos5gjnvn4hq3p; ImageCheck=6DV7')
        r = urllib2.urlopen(request)
        #response.headers['Content-Type']
        if r.headers().has_key('Content-Disposition'):
            fileName = r.info()['Content-Disposition'].split('filename=')[1]
            fileName = fileName.replace('"', '').replace("'", "")
            fileName=unquote(fileName).decode('utf8')
            print fileName
        elif r.url != url:
            # if we were redirected, the real file name we take from the final URL
            from os.path import basename
            from urlparse import urlsplit
            fileName = basename(urlsplit(r.url)[2])
        f = open(str(opi)+fileName, 'wb')
        f.write(r.read())
        f.close()
    print "File:", fileName,"downloaded"
    
        
    '''

 

op_url=""
op_url_base=""

'''
#763806 20170910-1751
for i in range(763549,769999):

201709111217-764103
760000,760698
760698,763549
760549-760690


750000,757966
'''
for i in range(776810,776910):
#for i in range(763549,763550):
    op_url = op_url_base + str(i) + ""
    print op_url
    download(op_url,i)

改进一下

def download2(url,opi, passName=None):
    t = time()
    if passName:
        fileName = passName
        urllib2.urlretrieve(attachURL, fileName)
    else:        
        request = urllib2.Request(url)
        request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko')
        request.add_header('Referer', 'https://xxx/')
        request.add_header('Cookie', 'ASP.NET_SessionId=qfhwwnbglzwos5gjnvn4hq3p; ImageCheck=6DV7')
        r = urllib2.urlopen(request)
        fileName = r.headers['Content-Disposition'].split('filename=')[1]
        fileName = fileName.replace('"', '').replace("'", "")
        fileName = unquote(fileName).decode('utf8')        
        f = open('./files/' +str(opi)+fileName, 'wb')
        f.write(r.read())
        f.close()    
        print "File:", fileName.encode("GBK", 'ignore') ,"downloaded"        
        print time() -t 
 

 

op_url="x"
op_url_base="x"
i=750046
op_url = op_url_base + str(i) + ""
print op_url
download2(op_url,i)

版权属于: 热心少年

原文地址: http://blog.rxsn.cn/post/60.html

转载时必须以链接形式注明原始出处及本声明。

python 文件批量下载

python 文件批量下载

暂无留言,赶快评论吧

欢迎留言取消回复

python 文件批量下载

python 文件批量下载

暂无留言,赶快评论吧

欢迎留言 取消回复

欢迎留言取消回复