Replacing absolute URLs with relative URLs in Python

From , 3 Years ago, written in Python, viewed 86 times.
URL https://pastebin.vip/view/5ca359ab
  1. #!/usr/bin/env python
  2. #-*- coding:utf-8 -*-
  3. #
  4. #
  5. # author  : cold night
  6. # email   : wh_linux@126.com
  7. #
  8.  
  9. import pymongo
  10. import re
  11. from StringIO import StringIO
  12.  
  13. conn = pymongo.Connection()
  14. db = conn.test
  15.  
  16. def replace_url():
  17.     regex = re.compile(r'([href¦src])=["¦\']http://.*?(/m/getimg\?.*?)["¦\']')
  18.     results = db['test'].find()
  19.     db_coll = db['test']
  20.     def replace(r):
  21.         content = r.get('content')
  22.         if not content: return
  23.         content = StringIO(content)
  24.         content.seek(0)
  25.         result = StringIO()
  26.         for line in content.readlines():
  27.             t = regex.sub(r'\1="\2"', line)
  28.             result.write(t)
  29.  
  30.         result.seek(0)
  31.         content = result.read()
  32.         if content:
  33.             r['content'] = content
  34.         _id = r.get('_id')
  35.         db_coll.update({'_id':_id}, r)
  36.  
  37.     results = [replace(i) for i in results]
  38.  
  39. if __name__=="__main__":replace_url()
  40. #//python/5323

Reply to "Replacing absolute URLs with relative URLs in Python"

Here you can reply to the paste above

captcha

https://burned.cc - Burn After Reading Website