Python downloads all the pictures of the specified URL

From , 5 Years ago, written in Python, viewed 193 times.
URL https://pastebin.vip/view/682e0e79
  1. #coding=utf-8
  2. #download pictures of the url
  3. #useage: python downpicture.py www.baidu.com
  4.  
  5. import os
  6. import sys
  7. from html.parser import HTMLParser
  8. from urllib.request import urlopen
  9. from urllib.parse import urlparse
  10.  
  11. def getpicname(path):
  12.     '''    retrive filename of url        '''
  13.     if os.path.splitext(path)[1] == '':
  14.         return None
  15.     pr=urlparse(path)
  16.     path='http://'+pr[1]+pr[2]
  17.     return os.path.split(path)[1]
  18.  
  19. def saveimgto(path, urls):
  20.     '''
  21.    save img of url to local path
  22.    '''
  23.     if not os.path.isdir(path):
  24.         print('path is invalid')
  25.         sys.exit()
  26.     else:
  27.         for url in urls:
  28.             of=open(os.path.join(path, getpicname(url)), 'w+b')
  29.             q=urlopen(url)
  30.             of.write(q.read())
  31.             q.close()
  32.             of.close()
  33.  
  34. class myhtmlparser(HTMLParser):
  35.     '''put all src of img into urls'''
  36.     def __init__(self):
  37.         HTMLParser.__init__(self)
  38.         self.urls=list()
  39.         self.num=0
  40.     def handle_starttag(self, tag, attr):
  41.         if tag.lower() == 'img':
  42.             srcs=[u[1] for u in attr if u[0].lower() == 'src']
  43.             self.urls.extend(srcs)
  44.             self.num = self.num+1
  45.  
  46. if __name__ == '__main__':
  47.     url=sys.argv[1]
  48.     if not url.startswith('http://'):
  49.         url='http://' + sys.argv[1]
  50.     parseresult=urlparse(url)
  51.     domain='http://' + parseresult[1]
  52.  
  53.     q=urlopen(url)
  54.     content=q.read().decode('utf-8', 'ignore')
  55.     q.close()
  56.  
  57.     myparser=myhtmlparser()
  58.     myparser.feed(content)
  59.  
  60.     for u in myparser.urls:
  61.         if (u.startswith('//')):
  62.             myparser.urls[myparser.urls.index(u)]= 'http:'+u
  63.         elif u.startswith('/'):
  64.             myparser.urls[myparser.urls.index(u)]= domain+u
  65.  
  66.     saveimgto(r'D:\python\song', myparser.urls)
  67.     print('num of download pictures is {}'.format(myparser.num))
  68.  
  69.  
  70. #//python/4446

Reply to "Python downloads all the pictures of the specified URL"

Here you can reply to the paste above

captcha

https://burned.cc - Burn After Reading Website