Commit d09d88dac6305786882d0e6e12371c1b2fe51b1e

  • avatar
  • arvind
  • Thu Mar 28 15:18:58 IST 2013
Creating a new API
				 - /SWeeText for fetching a requested page for annotation.
fetch.py
(17 / 1)
  
77from logging import FileHandler
88import pymongo
99import os
10import lxml.html
11import urllib2
12import StringIO
1013
1114app = Flask(__name__)
1215
3636 x = x + 1
3737 return jsonify(ret)
3838
39
39@app.route('/SWeeText',methods=['GET'])
40def SWeeText():
41 if request.args.has_key('url'):
42 myhandler1 = urllib2.Request(request.args['url'], headers={'User-Agent': "Mozilla/5.0(X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"})
43 a = urllib2.urlopen(myhandler1)
44 page = a.read()
45 a.close()
46 try:
47 page = unicode(page,'utf-8')
48 except UnicodeDecodeError:
49 pass
50 root = lxml.html.parse(StringIO.StringIO(page)).getroot()
51 root.make_links_absolute(request.args['url'], resolve_base_href = True)
52 return lxml.html.tostring(root)
4053#Log the errors, don't depend on apache to log it for you.
4154 fil = FileHandler(os.path.join(os.path.dirname(__file__), 'logme'),mode='a')
4255 fil.setLevel(logging.ERROR)