From d09d88dac6305786882d0e6e12371c1b2fe51b1e Mon Sep 17 00:00:00 2001 From: Arvind Date: Thu, 28 Mar 2013 15:18:58 +0530 Subject: [PATCH] Creating a new API - /SWeeText for fetching a requested page for annotation. --- fetch.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fetch.py b/fetch.py index 26b6518..25f4925 100644 --- a/fetch.py +++ b/fetch.py @@ -7,6 +7,9 @@ import logging from logging import FileHandler import pymongo import os +import lxml.html +import urllib2 +import StringIO app = Flask(__name__) @@ -33,7 +36,20 @@ def fetch(): x = x + 1 return jsonify(ret) - +@app.route('/SWeeText',methods=['GET']) +def SWeeText(): + if request.args.has_key('url'): + myhandler1 = urllib2.Request(request.args['url'], headers={'User-Agent': "Mozilla/5.0(X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"}) + a = urllib2.urlopen(myhandler1) + page = a.read() + a.close() + try: + page = unicode(page,'utf-8') + except UnicodeDecodeError: + pass + root = lxml.html.parse(StringIO.StringIO(page)).getroot() + root.make_links_absolute(request.args['url'], resolve_base_href = True) + return lxml.html.tostring(root) #Log the errors, don't depend on apache to log it for you. fil = FileHandler(os.path.join(os.path.dirname(__file__), 'logme'),mode='a') fil.setLevel(logging.ERROR) -- 1.7.10.4