-
Notifications
You must be signed in to change notification settings - Fork 0
/
cleverbot.py
174 lines (142 loc) · 5.82 KB
/
cleverbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""An unofficial library to access the Cleverbot API."""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from builtins import str # pylint: disable=redefined-builtin
from builtins import object # pylint: disable=redefined-builtin
import collections
import hashlib
import requests
from requests.compat import urlencode
from future.backports.html import parser
# Only use the instance method `unescape` of entity_parser. (I wish it was a
# static method or public function; it never uses `self` anyway)
entity_parser = parser.HTMLParser()
class Cleverbot(object):
"""Handles a conversation with Cleverbot.
Example usage:
>>> from cleverbot import Cleverbot
>>> cb = Cleverbot()
>>> cb.ask("Hi. How are you?")
"I'm good, thanks. How are you?"
"""
HOST = "www.cleverbot.com"
PROTOCOL = "http://"
RESOURCE = "/webservicemin?uc=165&"
API_URL = PROTOCOL + HOST + RESOURCE
headers = {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)',
'Accept': 'text/html,application/xhtml+xml,'
'application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept-Language': 'en-us,en;q=0.8,en-us;q=0.5,en;q=0.3',
'Cache-Control': 'no-cache',
'Host': HOST,
'Referer': PROTOCOL + HOST + '/',
'Pragma': 'no-cache'
}
def __init__(self):
""" The data that will get passed to Cleverbot's web API """
self.data = collections.OrderedDict(
(
# must be the first pairs
('stimulus', ''),
('cb_settings_language', ''),
('cb_settings_scripting', 'no'),
('islearning', 1), # Never modified
('icognoid', 'wsf'), # Never modified
('icognocheck', ''),
('start', 'y'), # Never modified
('sessionid', ''),
('vText8', ''),
('vText7', ''),
('vText6', ''),
('vText5', ''),
('vText4', ''),
('vText3', ''),
('vText2', ''),
('fno', 0), # Never modified
('prevref', ''),
('emotionaloutput', ''), # Never modified
('emotionalhistory', ''), # Never modified
('asbotname', ''), # Never modified
('ttsvoice', ''), # Never modified
('typing', ''), # Never modified
('lineref', ''),
('sub', 'Say'), # Never modified
('cleanslate', False), # Never modified
)
)
# the log of our conversation with Cleverbot
self.conversation = []
# get the main page to get a cookie (see bug #13)
self.session = requests.Session()
self.session.get(Cleverbot.PROTOCOL + Cleverbot.HOST)
def ask(self, question):
"""Asks Cleverbot a question.
Maintains message history.
:param question: The question to ask
:return Cleverbot's answer
"""
question = question.encode('utf-8')
# Set the current question
self.data['stimulus'] = question
# Connect to Cleverbot's API and remember the response
resp = self._send()
# Add the current question to the conversation log
self.conversation.append(question)
#parsed = self._parse(resp.text)
parsed = self._parse(resp.content.decode('utf-8'))
# Set data as appropriate
if self.data['sessionid'] != '':
self.data['sessionid'] = parsed['conversation_id']
# Add Cleverbot's reply to the conversation log
#self.conversation.append(parsed['answer'])
self.conversation.append(parsed['answer'].encode('utf-8'))
return parsed['answer']
def _send(self):
"""POST the user's question and all required information to the
Cleverbot API
Cleverbot tries to prevent unauthorized access to its API by
obfuscating how it generates the 'icognocheck' token. The token is
currently the md5 checksum of the 10th through 36th characters of the
encoded data. This may change in the future.
TODO: Order is not guaranteed when urlencoding dicts. This hasn't been
a problem yet, but let's look into ordered dicts or tuples instead.
"""
# Set data as appropriate
if self.conversation:
linecount = 1
for line in reversed(self.conversation):
linecount += 1
self.data['vText' + str(linecount)] = line
if linecount == 8:
break
# Generate the token
enc_data = urlencode(self.data)
digest_txt = enc_data[9:35]
token = hashlib.md5(digest_txt.encode('utf-8')).hexdigest()
self.data['icognocheck'] = token
# POST the data to Cleverbot's API and return
return self.session.post(Cleverbot.API_URL,
data=self.data,
headers=Cleverbot.headers)
@staticmethod
def _parse(resp_text):
"""Parses Cleverbot's response"""
resp_text = entity_parser.unescape(resp_text)
parsed = [
item.split('\r') for item in resp_text.split('\r\r\r\r\r\r')[:-1]
]
if parsed[0][1] == 'DENIED':
raise CleverbotAPIError()
parsed_dict = {
'answer': parsed[0][0],
'conversation_id': parsed[0][1],
}
try:
parsed_dict['unknown'] = parsed[1][-1]
except IndexError:
parsed_dict['unknown'] = None
return parsed_dict
class CleverbotAPIError(Exception):
"""Cleverbot returned an error (it probably recognized us as a bot)"""