Commit d09d88dac6305786882d0e6e12371c1b2fe51b1e
Creating a new API
- /SWeeText for fetching a requested page for annotation.
| | | | 7 | from logging import FileHandler | 7 | from logging import FileHandler |
---|
8 | import pymongo | 8 | import pymongo |
---|
9 | import os | 9 | import os |
---|
| | 10 | import lxml.html |
---|
| | 11 | import urllib2 |
---|
| | 12 | import StringIO |
---|
10 | | 13 | |
---|
11 | app = Flask(__name__) | 14 | app = Flask(__name__) |
---|
12 | | 15 | |
---|
… | | … | |
---|
36 | x = x + 1 | 36 | x = x + 1 |
---|
37 | return jsonify(ret) | 37 | return jsonify(ret) |
---|
38 | | 38 | |
---|
39 | | | |
---|
| | 39 | @app.route('/SWeeText',methods=['GET']) | | | 40 | def SWeeText(): |
---|
| | 41 | if request.args.has_key('url'): |
---|
| | 42 | myhandler1 = urllib2.Request(request.args['url'], headers={'User-Agent': "Mozilla/5.0(X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"}) |
---|
| | 43 | a = urllib2.urlopen(myhandler1) |
---|
| | 44 | page = a.read() |
---|
| | 45 | a.close() |
---|
| | 46 | try: |
---|
| | 47 | page = unicode(page,'utf-8') |
---|
| | 48 | except UnicodeDecodeError: |
---|
| | 49 | pass |
---|
| | 50 | root = lxml.html.parse(StringIO.StringIO(page)).getroot() |
---|
| | 51 | root.make_links_absolute(request.args['url'], resolve_base_href = True) |
---|
| | 52 | return lxml.html.tostring(root) |
---|
40 | #Log the errors, don't depend on apache to log it for you. | 53 | #Log the errors, don't depend on apache to log it for you. |
---|
41 | fil = FileHandler(os.path.join(os.path.dirname(__file__), 'logme'),mode='a') | 54 | fil = FileHandler(os.path.join(os.path.dirname(__file__), 'logme'),mode='a') |
---|
42 | fil.setLevel(logging.ERROR) | 55 | fil.setLevel(logging.ERROR) |
---|