Commit d09d88dac6305786882d0e6e12371c1b2fe51b1e
- Diff rendering mode:
- inline
- side by side
fetch.py
(17 / 1)
  | |||
7 | 7 | from logging import FileHandler | |
8 | 8 | import pymongo | |
9 | 9 | import os | |
10 | import lxml.html | ||
11 | import urllib2 | ||
12 | import StringIO | ||
10 | 13 | ||
11 | 14 | app = Flask(__name__) | |
12 | 15 | ||
… | … | ||
36 | 36 | x = x + 1 | |
37 | 37 | return jsonify(ret) | |
38 | 38 | ||
39 | |||
39 | @app.route('/SWeeText',methods=['GET']) | ||
40 | def SWeeText(): | ||
41 | if request.args.has_key('url'): | ||
42 | myhandler1 = urllib2.Request(request.args['url'], headers={'User-Agent': "Mozilla/5.0(X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"}) | ||
43 | a = urllib2.urlopen(myhandler1) | ||
44 | page = a.read() | ||
45 | a.close() | ||
46 | try: | ||
47 | page = unicode(page,'utf-8') | ||
48 | except UnicodeDecodeError: | ||
49 | pass | ||
50 | root = lxml.html.parse(StringIO.StringIO(page)).getroot() | ||
51 | root.make_links_absolute(request.args['url'], resolve_base_href = True) | ||
52 | return lxml.html.tostring(root) | ||
40 | 53 | #Log the errors, don't depend on apache to log it for you. | |
41 | 54 | fil = FileHandler(os.path.join(os.path.dirname(__file__), 'logme'),mode='a') | |
42 | 55 | fil.setLevel(logging.ERROR) |