Commit ca7f6147908dd2ca423532ca0c17ee35ce712703

  • avatar
  • admin
  • Wed Oct 05 21:18:51 IST 2011
Removing ~ files, added exclude in config
  
1<!ENTITY a11ypiSidebar.label "Re-narrate">
2<!ENTITY a11ypiSidebar.title "a11ypi">
  
1{"Languages":[
2 {"name": "Kannada",
3 "alt" : [],
4 "dialects" : [ {"name":"Mysore-Kannada", "alt": [] }, { "name":"Dharwad-Kannada", "alt" : [] }]
5 },
6 {"name": "Tamil",
7 "alt" : [],
8 "dialects" : []
9 },
10 {
11 "name" :"English",
12 "alt" : [],
13 "dialects" : [{"name":"Indian-English", "alt": [] }]
14 }
15 ],
16
17"Locations": [
18 {"name": "India",
19 "alt" : [],
20 "states":[
21 {
22 "name" :"Karnataka",
23 "alt" : [],
24 "towns": [ {"name":"Bangalore", "alt": []}, {"name":"Mysore", "alt":[]}, {"name":"Tumkur", "alt":[]} ]
25 },
26 {
27 "name" : "TamilNadu",
28 "alt" : [],
29 "towns" : [{"name":"Chennai", "alt":[]}]
30 }
31 ]
32 }
33 ],
34
35"Style" : [{"name":"Summary", "alt":[]},{ "name":"Abstract", "alt":[]},{ "name":"Simplification", "alt":[]}]
36
37}
38
  
1from cgi import parse_qs
2from itertools import izip
3import json
4import os
5import commands
6from pymongo import *
7from bson import *
8from gdata import service
9from urllib import unquote_plus
10import unicodedata
11
12def application(environ, start_response):
13 #set the headers
14 status = '200 OK'
15 response_headers = [('Content-type', 'text/plain'),('Access-Control-Allow-Origin', '*')]
16 start_response(status, response_headers)
17 try:
18 recieved = environ['wsgi.input'].read(int(environ['CONTENT_LENGTH']))
19
20 except KeyError:
21 recieved= 'empty'
22 print >> environ['wsgi.errors'], recieved
23
24 else:
25 #connect to the DB
26 connection = Connection('localhost',27017)
27 db = connection['alipi']
28 collection = db['post']
29
30 d={}
31 parameter_pairs = recieved.split('&')
32 for parameter_pair in parameter_pairs:
33 parameter_pair = parameter_pair.split('=',1)
34 d[unquote_plus(parameter_pair[0])] = unquote_plus(parameter_pair[1])
35
36 url = d['url']
37 lang = d['lang']
38
39 #all re-narrations of the same xpath are grouped
40 query = collection.group(
41 key = Code('function(doc){return {"xpath" : doc.xpath, "url": doc.url}}'),
42 condition={"url" : url, "lang" : lang},
43 initial={'narration': []},
44 reduce=Code('function(doc,out){out.narration.push(doc);}')
45 )
46
47 string=''
48 if len(query)==0:
49 print >> environ['wsgi.errors'], 'empty'
50 return 'empty'
51 else:
52 for key in query:
53# print >> environ['wsgi.errors'], query
54 post = key['narration'][len(key['narration'])-1] #Fetching the last done re-narration
55
56 try:
57 string+="###"
58
59 for key in post:
60 if type(post[key]) is not float:
61 if key != '_id':
62 try:
63 if type(post[key]) is unicode:
64 string+="&"+str(key)+"::"+ post[key].encode('utf-8')
65 else:
66 string+="&"+str(key)+"::"+ post[key]
67 except TypeError:
68 print >> environ['wsgi.errors'], key
69 else:
70 try:
71 string+="&"+str(key)+"::"+ str(post[key])
72 except TypeError:
73 print >> environ['wsgi.errors'], key
74 except UnicodeEncodeError:
75 print >> environ['wsgi.errors'], key
76 print >> environ['wsgi.errors'], 'Error Encoding request string'
77 return 'empty'
78
79 return string
80
  
1import json
2from pymongo import *
3from bson.code import *
4from pickle import *
5
6#this file returns data.json, can be exploited later to return any file needed
7def application(environ, start_response):
8 #set the headers
9 status = '200 OK'
10 response_headers = [('Content-type', 'application/json'),( 'Access-Control-Allow-Origin', '*')]
11 start_response(status, response_headers)
12
13 try:
14 #read the request
15 recieved = environ['wsgi.input'].read(int(environ['CONTENT_LENGTH']))
16 data = open("/var/www/wsgi/data.json", "rb").read()
17 except KeyError:
18 recieved = 'empty'
19 print >> environ['wsgi.errors'], recieved
20 #print >> environ['wsgi.errors'], environ['wsgi.input'].read(int(environ['CONTENT_LENGTH']))
21 return recieved
22
23 else:
24 jsonData = json.loads(data)
25 #print >> environ['wsgi.errors'],
26 return json.dumps(jsonData)
27
28
  
1import gdata
2import atom
3from gdata import service
4from urllib import unquote_plus
5import commands
6import time
7def application(environ, start_response):
8 status = '200 OK'
9 response_headers = [('Content-type', 'text/plain')]
10 start_response(status, response_headers)
11 recieved = environ['wsgi.input'].read(int(environ['CONTENT_LENGTH']))
12 parameter_pairs = recieved.split('&')
13 parameter = dict()
14 for parameter_pair in parameter_pairs: #Converting to dictionary, would easy to traverse.
15 parameter_pair = parameter_pair.split('=',1)
16 parameter[unquote_plus(parameter_pair[0])] = unquote_plus(parameter_pair[1])
17 blogger_service = service.GDataService(parameter['Email'], parameter['Passwd'])
18 blogger_service.source = 'Servelots-alipi-1.0'
19 blogger_service.service = 'blogger'
20 blogger_service.account_type = 'GOOGLE'
21 blogger_service.server = 'www.blogger.com'
22 blogger_service.ProgrammaticLogin()
23 query = service.Query()
24 query.feed = '/feeds/default/blogs'
25 feed = blogger_service.Get(query.ToUri())
26 blog_id = " "
27 for entry in feed.entry:
28 if parameter['href'] == entry.GetHtmlLink().href:
29 blog_id = entry.GetSelfLink().href.split("/")[-1]
30 blogEntry = CreatePublicPost(blogger_service, blog_id, title=parameter['title'], content=parameter['content']+'&amp;lang='+ parameter['lang'])
31 time.sleep(10)
32 cmd = 'cd /home/alipi-crawler/a11ypi;scrapy crawl --spider a11y.in ' + blogEntry.GetHtmlLink().href
33 commands.getoutput(cmd)
34 return ["Blog successfuly posted!!"]
35
36def CreatePublicPost(blogger_service, blog_id, title, content):
37 entry = gdata.GDataEntry()
38 entry.title = atom.Title('xhtml', title)
39 entry.content = atom.Content(content_type='html', text=content)
40 return blogger_service.Post(entry, '/feeds/%s/posts/default' % blog_id)
  
1import json
2from pymongo import *
3from bson.code import *
4def application(environ, start_response):
5 #set the headers
6 status = '200 OK'
7 response_headers = [('Content-type', 'text/plain'),( 'Access-Control-Allow-Origin', '*')]
8 start_response(status, response_headers)
9
10 try:
11 #read the request
12 recieved = environ['wsgi.input'].read(int(environ['CONTENT_LENGTH']))
13 #print >> environ['wsgi.errors'], recieved
14 except KeyError:
15 #print >> environ['wsgi.errors'], recieved
16 return 'empty'
17
18 else:
19 #connect to the DB
20 connection = Connection('localhost',27017)
21 db = connection['alipi']
22 collection = db['post']
23 #get the ren languages for the received url
24 langForUrl = collection.group(
25 key = Code('function(doc){return {"url" : doc.url}}'),
26 condition={"url" : recieved},
27 initial={'lang': []},
28 reduce=Code('function(doc, out){if (out.lang.indexOf(doc.lang) == -1) out.lang.push(doc.lang)}') #here xpath for test
29 )
30
31 #send the response
32 if (langForUrl):
33 return json.dumps(langForUrl[0]['lang'])
34 else:
35 return "empty"
36
37
  
1from lxml.html import *
2from cgi import parse_qs
3from itertools import izip
4import json
5import os
6import commands
7from pymongo import *
8from bson import *
9from gdata import service
10from urllib import unquote_plus
11
12def application(environ, start_response):
13 #set the headers
14 status = '200 OK'
15 response_headers = [('Content-type', 'text/plain'),( 'Access-Control-Allow-Origin', '*')]
16 start_response(status, response_headers)
17 try:
18 recieved = environ['wsgi.input'].read(int(environ['CONTENT_LENGTH']))
19
20 except KeyError:
21 recieved= 'empty'
22 #print >> environ['wsgi.errors'], recieved
23
24 else:
25 #connect to the DB
26 connection = Connection('localhost',27017)
27 db = connection['alipi']
28 collection = db['post']
29
30 d={}
31 parameter_pairs = recieved.split('&')
32 for parameter_pair in parameter_pairs:
33 parameter_pair = parameter_pair.split('=',1)
34 d[unquote_plus(parameter_pair[0])] = unquote_plus(parameter_pair[1])
35
36 url = d['url']
37 xpath = d['xpath']
38
39 #get the ren languages for the received url
40 query = collection.group(
41 key = Code('function(doc){return {"ren_id" : doc.ren_id}}'),
42 condition={"url" : url, "xpath" : xpath},
43 initial={'narration': []},
44 reduce=Code('function(doc,out){out.narration.push(doc);}')
45 )
46 print >> environ['wsgi.errors'], query
47 string=''
48 i = 0
49
50 if len(query) == 0:
51 #print >> environ['wsgi.errors'], 'empty'
52 return ''
53
54 else:
55 while i< len(query):
56 post = query[i]['narration'][0]
57 try:
58 string+="###"
59
60 for key in post:
61 if type(post[key]) is not float:
62 if key != '_id':
63 try:
64 if type(post[key]) is unicode:
65 string+="&"+str(key)+"::"+ post[key].encode('utf-8')
66 else:
67 string+="&"+str(key)+"::"+ post[key]
68 except TypeError:
69 print >> environ['wsgi.errors'], key
70 else:
71 try:
72 string+="&"+str(key)+"::"+ str(post[key])
73 except TypeError:
74 print >> environ['wsgi.errors'], key
75 except UnicodeEncodeError:
76 print >> environ['wsgi.errors'], 'Error Encoding request string'
77 return 'empty'
78 else:
79 i+=1
80 return string
81
  
1from lxml.html import *
2from cgi import parse_qs
3from itertools import izip
4import json
5import os
6import commands
7from pymongo import *
8from bson import *
9from gdata import service
10from urllib import unquote_plus
11import unicodedata
12
13def application(environ, start_response):
14 #set the headers
15 status = '200 OK'
16 response_headers = [('Content-type', 'text/plain'),('Access-Control-Allow-Origin', '*')]
17 start_response(status, response_headers)
18 try:
19 recieved = environ['wsgi.input'].read(int(environ['CONTENT_LENGTH']))
20
21 except KeyError:
22 recieved= 'empty'
23 print >> environ['wsgi.errors'], recieved
24
25 else:
26 #connect to the DB
27 connection = Connection('localhost',27017)
28 db = connection['alipi']
29 collection = db['post']
30
31 d={}
32 parameter_pairs = recieved.split('&')
33 for parameter_pair in parameter_pairs:
34 parameter_pair = parameter_pair.split('=',1)
35 d[unquote_plus(parameter_pair[0])] = unquote_plus(parameter_pair[1])
36
37 url = d['url']
38 lang = d['lang']
39
40 #all re-narrations of the same xpath are grouped
41 query = collection.group(
42 key = Code('function(doc){return {"xpath" : doc.xpath, "url": doc.url}}'),
43 condition={"url" : url, "lang" : lang},
44 initial={'narration': []},
45 reduce=Code('function(doc,out){out.narration.push(doc);}')
46 )
47
48 string=''
49 if len(query)==0:
50 print >> environ['wsgi.errors'], 'empty'
51 return 'empty'
52 else:
53 for key in query:
54# print >> environ['wsgi.errors'], query
55 post = key['narration'][0] #for now, we only take the first re-narations, after we'll pick regarding filters.
56
57 try:
58 string+="###"
59
60 for key in post:
61 if type(post[key]) is not float:
62 if key != '_id':
63 try:
64 if type(post[key]) is unicode:
65 string+="&"+str(key)+"::"+ post[key].encode('utf-8')
66 else:
67 string+="&"+str(key)+"::"+ post[key]
68 except TypeError:
69 print >> environ['wsgi.errors'], key
70 else:
71 try:
72 string+="&"+str(key)+"::"+ str(post[key])
73 except TypeError:
74 print >> environ['wsgi.errors'], key
75 except UnicodeEncodeError:
76 print >> environ['wsgi.errors'], key
77 print >> environ['wsgi.errors'], 'Error Encoding request string'
78 return 'empty'
79
80 return string
81
  
1import gdata
2import atom
3from gdata import service
4import json
5from pymongo import *
6from bson.code import *
7#from gdata import service
8from urllib import unquote_plus
9#import commands
10#import time
11import random
12
13def application(environ, start_response):
14 #set the headers
15 status = '200 OK'
16 response_headers = [('Content-type', 'text/plain'),( 'Access-Control-Allow-Origin', '*')]
17 start_response(status, response_headers)
18
19 try:
20 recieved = environ['wsgi.input'].read(int(environ['CONTENT_LENGTH']))
21
22 except KeyError:
23 recieved= 'empty'
24 print >> environ['wsgi.errors'], recieved
25 return 'empty'
26 else:
27 print >> environ['wsgi.errors'], recieved
28 #connect to DB
29 MONGODB_SERVER = 'localhost'
30 MONGODB_PORT = 27017
31 MONGODB_DB = 'alipi'
32 MONGODB_COLLECTION = 'post'
33 #MONGODB_UNIQ_KEY = 'url'
34
35 connection = Connection(MONGODB_SERVER, MONGODB_PORT)
36 db = connection[MONGODB_DB]
37 collection = db[MONGODB_COLLECTION]
38 collection.create_index("url")
39 ren_id = random.random() #all elements from the same ren have the same id
40
41 #parse recieved data and save in a dict()
42 string = ''
43 lang = ''
44 target = ''
45 url = ''
46 author = ''
47 commands = recieved.split('###') #for every elementary re-narration (e.g a paragraph)
48 dicts = []
49 i = 0
50 for command in commands:
51 d = {}
52 parameter_pairs = command.split('&');
53 for parameter_pair in parameter_pairs:
54 parameter_pair = parameter_pair.split('=',1)
55 d[unquote_plus(parameter_pair[0])] = unquote_plus(parameter_pair[1])
56
57 d['ren_id']= ren_id
58 string+='<p '
59 string+='about='+'"'+d['url']+'"'+' '
60 string+='xpath='+'"'+d['xpath']+'"'+' '
61 string+='location='+'"'+d['location']+'"'+' '
62 string+='lang='+'"'+d['lang']+'"'+' '
63 string+='author='+'"'+d['author']+'"'+' '
64 string+='style='+'"'+d['style']+'"'+' '
65 string+='elementType='+'"'+d['elementType']+'"'+' '
66 string+='>'
67 string += d['data']
68 string+='<p>'
69
70 lang = d['lang']
71 target = d['location']
72 url = d['url']
73 author = d['author']
74
75 dicts.append(d)
76 i+=1
77 blogEntry= ''
78 blogger_service = service.GDataService("allipi123@gmail.com", "allipi3354")
79 blogger_service.source = 'Servelots-alipi-1.0'
80 blogger_service.service = 'blogger'
81 blogger_service.account_type = 'GOOGLE'
82 blogger_service.server = 'www.blogger.com'
83 blogger_service.ProgrammaticLogin()
84 query = service.Query()
85 query.feed = '/feeds/default/blogs'
86 feed = blogger_service.Get(query.ToUri())
87 blog_id = " "
88 for entry in feed.entry:
89 if "http://alipi-workshop.blogspot.com/" == entry.GetHtmlLink().href:
90 blog_id = entry.GetSelfLink().href.split("/")[-1]
91 blogEntry = CreatePublicPost(blogger_service, blog_id, title="Re-narration", content=string + "<blockquote><p>Re-narration by "+author+' in '+lang+' targeting '+target+' for this web <a href="'+url+'">page</a></p></blockquote>')
92
93 j=0
94 while j< len(dicts):
95 #dicts[j]["blog"] = str(blogEntry.GetHtmlLink().href)
96 collection.insert(dicts[j])
97 j+=1
98
99 #commands.getoutput(cmd)
100
101 return 'ok'
102 #return ["Blog successfuly posted!!"]
103
104def CreatePublicPost(blogger_service, blog_id, title, content):
105 entry = gdata.GDataEntry()
106 entry.title = atom.Title('xhtml', title)
107 entry.content = atom.Content(content_type='html', text=content)
108 return blogger_service.Post(entry, '/feeds/%s/posts/default' % blog_id)