Commit d09d88dac6305786882d0e6e12371c1b2fe51b1e

  • avatar
  • arvind
  • Thu Mar 28 15:18:58 IST 2013
Creating a new API
				 - /SWeeText for fetching a requested page for annotation.
  • Diff rendering mode:
  • inline
  • side by side

fetch.py

7from logging import FileHandler7from logging import FileHandler
8import pymongo8import pymongo
9import os9import os
10import lxml.html
11import urllib2
12import StringIO
1013
11app = Flask(__name__)14app = Flask(__name__)
1215
36 x = x + 136 x = x + 1
37 return jsonify(ret)37 return jsonify(ret)
3838
39
39@app.route('/SWeeText',methods=['GET'])
40def SWeeText():
41 if request.args.has_key('url'):
42 myhandler1 = urllib2.Request(request.args['url'], headers={'User-Agent': "Mozilla/5.0(X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"})
43 a = urllib2.urlopen(myhandler1)
44 page = a.read()
45 a.close()
46 try:
47 page = unicode(page,'utf-8')
48 except UnicodeDecodeError:
49 pass
50 root = lxml.html.parse(StringIO.StringIO(page)).getroot()
51 root.make_links_absolute(request.args['url'], resolve_base_href = True)
52 return lxml.html.tostring(root)
40#Log the errors, don't depend on apache to log it for you.53#Log the errors, don't depend on apache to log it for you.
41 fil = FileHandler(os.path.join(os.path.dirname(__file__), 'logme'),mode='a')54 fil = FileHandler(os.path.join(os.path.dirname(__file__), 'logme'),mode='a')
42 fil.setLevel(logging.ERROR)55 fil.setLevel(logging.ERROR)