09月22日, 2014 1914次
import urllib import urllib2 from urllib import unquote def download(url,opi, passName=None): if passName: fileName = passName urllib.urlretrieve(attachURL, fileName) else: r = urllib.urlopen(url) if r.info().has_key('Content-Disposition'): fileName = r.info()['Content-Disposition'].split('filename=')[1] fileName = fileName.replace('"', '').replace("'", "") fileName=unquote(fileName).decode('utf8') #print fileName elif r.url != url: # if we were redirected, the real file name we take from the final URL from os.path import basename from urlparse import urlsplit fileName = basename(urlsplit(r.url)[2]) f = open('./files/' +str(opi)+fileName, 'wb') f.write(r.read()) f.close() #print "File:", fileName,"downloaded" print "File:", fileName.encode("GBK", 'ignore') ,"downloaded" ''' def download2(url,opi, passName=None): if passName: fileName = passName urllib2.urlretrieve(attachURL, fileName) else: request = urllib2.Request(url) request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko') request.add_header('Referer', 'https://xxxxx') request.add_header('Cookie', 'ASP.NET_SessionId=qfhwwnbglzwos5gjnvn4hq3p; ImageCheck=6DV7') r = urllib2.urlopen(request) #response.headers['Content-Type'] if r.headers().has_key('Content-Disposition'): fileName = r.info()['Content-Disposition'].split('filename=')[1] fileName = fileName.replace('"', '').replace("'", "") fileName=unquote(fileName).decode('utf8') print fileName elif r.url != url: # if we were redirected, the real file name we take from the final URL from os.path import basename from urlparse import urlsplit fileName = basename(urlsplit(r.url)[2]) f = open(str(opi)+fileName, 'wb') f.write(r.read()) f.close() print "File:", fileName,"downloaded" ''' op_url="" op_url_base="" ''' #763806 20170910-1751 for i in range(763549,769999): 201709111217-764103 760000,760698 760698,763549 760549-760690 750000,757966 ''' for i in range(776810,776910): #for i in range(763549,763550): op_url = op_url_base + str(i) + "" print op_url download(op_url,i)
改进一下
def download2(url,opi, passName=None): t = time() if passName: fileName = passName urllib2.urlretrieve(attachURL, fileName) else: request = urllib2.Request(url) request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko') request.add_header('Referer', 'https://xxx/') request.add_header('Cookie', 'ASP.NET_SessionId=qfhwwnbglzwos5gjnvn4hq3p; ImageCheck=6DV7') r = urllib2.urlopen(request) fileName = r.headers['Content-Disposition'].split('filename=')[1] fileName = fileName.replace('"', '').replace("'", "") fileName = unquote(fileName).decode('utf8') f = open('./files/' +str(opi)+fileName, 'wb') f.write(r.read()) f.close() print "File:", fileName.encode("GBK", 'ignore') ,"downloaded" print time() -t op_url="x" op_url_base="x" i=750046 op_url = op_url_base + str(i) + "" print op_url download2(op_url,i)
暂无留言,赶快评论吧