1 """
2 Bachelor AI project 2009. Andreas van Cranenburgh 0440949.
3 Two-word stage dialogue simulator using a corpus of exemplars.
4
5 Interactive usage:
6 $ python model.py
7
8 Other uses:
9 - Re-enact dialogue in 01.cha (should be a text file with one utterance
10 per line, with each line preceeded by *MOT or *CHI, as in the CHILDES
11 corpus):
12
13 $ python
14 # >>> import model
15 # >>> model.evaluate(open('01.cha').read())
16 ...
17
18 - Talk to self for 25 utterances:
19
20 # >>> model.selftalk(25)
21 ...
22
23 - Demonstrate some sample utterances that work (but not from corpus):
24
25 # >>> model.test()
26 ...
27 """
28
29
30
31
32
33
34
35
36
37
38
39
40
41 try:
42 from free_will import choice
43 except ImportError:
44 from random import choice
45 from string import uppercase, lowercase
46 from collections import defaultdict
47 from pprint import pprint
48
49 import math
50
52 """ Interactive textual user interface. """
53 print "Child dialogue simulator.",
54 print "Enter parent utterance (or `quit' to quit)."
55 print
56 print "Utterances in corpus: "
57 print getexemplars().keys()
58 print
59 lexicon = inferlexicon(getexemplars(), True)
60 print 'Lexicon (distilled from corpus): ', lexicon
61
62 d = dialogue()
63 d.send(None)
64 while True:
65 print "Parent: ",
66 utt = raw_input()
67 if utt == 'quit':
68 break
69 reply = d.send(utt)
70 print "Child: ", reply
71
73 """ Print debug output (also see nodbg).
74 Takes a variable number of arguments of any type.
75
76 >>> dbg('choices', range(3))
77 choices [0, 1, 2]
78 """
79 print (' '.join(list(str(a).expandtabs() for a in m)))
81
82 -def dialogue(exemplars={}, constructions={}, lexicon={}, debug=dbg):
83 """ Co-routine for dialogue. Use send(utterance) to receive a reply.
84 Initial input: exemplars
85 Further input: one utterance at a time
86 Output: one reply at a time"""
87 if not exemplars:
88 exemplars = getexemplars()
89 if not lexicon:
90 lexicon = inferlexicon(exemplars, False)
91 if not constructions:
92 constructions = inferconstructions(exemplars, lexicon)
93 discourse, topic, reactionutt = [('','')], '', ''
94 while True:
95 utt = yield reactionutt
96 newmeaning = interpret(utt, topic, exemplars, lexicon)
97 if 'reinforcement' in newmeaning and len(discourse) > 1:
98 reinforce(meaning, reaction, reactionutt, discourse, exemplars)
99 meaning = newmeaning
100 debug(' interpretation:', meaning)
101
102
103 if False and utt not in exemplars and meaning:
104 if meaning == lexiconcheck(utt, meaning, lexicon):
105 debug('added utterance to exemplars')
106 exemplars[utt] = meaning
107 constructions = inferconstructions(exemplars, lexicon)
108 else:
109 debug('generation check failed, not added to exemplars')
110
111 reaction = response(meaning, exemplars)
112 debug(' reaction:', reaction)
113 reactionutt = express2(reaction, exemplars, constructions, lexicon)
114 discourse.append((utt, meaning))
115 discourse.append((reactionutt, reaction))
116
117
118 if (topic not in meaning and topic) or not topic:
119 topic = findtopic(discourse) or ''
120
122 """ Talk to oneself, picking a random utterance from the exemplars when
123 repetition is detected.
124 Input: number of utterances to generate, exemplars
125 Output: transcript of conversation
126 """
127 if not e:
128 e = getexemplars()
129 a = choice(e.keys())
130 speaking, listening = '*MOT', '*CHI'
131 said = set([a])
132 c = []
133
134 d = dialogue(e)
135 d.send(None)
136
137 for x in range(u):
138 c.append('%s: %s' % (speaking, a))
139 debug(c[-1])
140 speaking, listening = listening, speaking
141 r = d.send(a)
142 if a == r and speaking == '*MOT':
143 debug('picking random utterance')
144
145
146
147 a = choice(list(set(e.keys()) - said))
148 said.add(a)
149 else:
150 a = r
151 print '\n', '\n'.join(c)
152
154 """ When presented with a corpus fragment, feed parent utterances to model
155 and generate model responses in place of child utterances.
156 n = number of iterations to perform
157 Input: string with transcript of corpus
158 Output: same transcript with replies from model instead of child
159 """
160
161
162 if not exemplars:
163 exemplars = getexemplars()
164
165 d = dialogue(exemplars)
166 d.send(None)
167 for a in range(n):
168 modelconv = []
169 for b in conv.split('\n'):
170 try:
171 who, utt = b.split(':', 1)
172 except ValueError:
173
174 continue
175 if 'MOT' in who:
176 print b
177 ans = '*CHI: ' + d.send(utt)
178 print ans
179 for x in (b, ans):
180 modelconv.append(x)
181 print
182 return '\n'.join((str(a) for a in modelconv))
183
184 -def reinforce(meaning, reaction, reactionutt, discourse, exemplars):
185 """ Strengthen connection between last two utterances by adding
186 a random identifier and adding or updating the resulting exemplar.
187 Input: two utterances with meanings
188 Side-effect: updated or added exemplars
189 """
190 randomid = "".join(choice(lowercase) for a in range(5))
191 if discourse[-1][0] in exemplars:
192 print 'updated',
193 else:
194 print 'added',
195 exemplars[discourse[-1][0]] = meaning + ' ' + randomid
196 print 'reinforcement:', exemplars[discourse[-1][0]]
197 if reactionutt in exemplars:
198 print 'updated',
199 else:
200 print 'added',
201 print 'with',
202 exemplars[reactionutt] = reaction + ' ' + randomid
203 print exemplars[reactionutt]
204
206 """ Look for a recurring element in the last two utterances,
207 if found, this becomes the new topic.
208 Input: discourse (list of utterances and meanings)
209 Output: clause, or None
210 """
211
212 def minprefertwo(seq):
213 seq = list(seq)
214 try:
215 return ((a,b) for a,b in seq if a==2).next()
216 except StopIteration:
217 return min(seq)
218
219
220
221 m1, m2 = discourse[-1][1].split(), discourse[-2][1].split()
222 common = intersect(m1[1:], m2)
223 if common:
224
225 topic = minprefertwo(
226 (minprefertwo(
227 ((m1.index(a), a),
228 (m2.index(a), a))) for a in common))[1]
229 debug(' topic:', topic)
230 return topic
231
232 -def interpret(utterance, topic, exemplars, lexicon, debug=dbg):
233 """ Return semantic representation for linguistic utterance.
234 This function is in charge of backtracking over initial exemplars
235 and picking the best result, the rest is done by interpretwith()
236 Input: utterance
237 Output: meaning representation
238 """
239 def match(a, b):
240 return len(intersect(a, b.split()))
241
242 utterance = utterance.replace('[= ', '[=')
243 words = utterance.split()
244 initial = [(match(words, a), a, b) for a,b in exemplars.items()]
245
246
247
248
249
250 n = max(initial)[0]
251 if n:
252 initial = [a[1:] for a in initial if a[0] == n]
253 else:
254
255 return ''
256
257 i = []
258 for exutt, exmeaning in initial:
259
260 w = [b for b in words if b not in exutt]
261 n, meaning = interpretwith(w, exmeaning, exemplars, lexicon, nodbg)
262 if meaning:
263
264
265
266
267
268
269
270
271
272
273 i.append((
274 n,
275 edit_dist(lexiconcheck(utterance, meaning, lexicon), meaning),
276 1.0 / len(tokens(meaning)),
277
278
279
280 (exutt, exmeaning)))
281
282
283 if i:
284
285 a = min(i)[-1]
286 debug('initial exemplar:')
287 debug('\t', a)
288
289
290 c = conciliate(topic, a[1], varclauses(b))
291 if c:
292 b = c
293 else:
294 b = a[1]
295
296
297 w = [c for c in words if c not in a[0].split()]
298 debug('words left:', repr(' '.join(w)))
299 if w:
300 return interpretwith(w, b, exemplars, lexicon, debug)[1]
301
302 return b
303 else:
304 return ''
305
307
308
309
310 def elim(a):
311 return a.replace('[=%s]' % a[2:-1], a[2:-1]).strip().lower()
312 revlex = revdict(lex)
313
314 uttlex = " ".join(lex[elim(b)] for b in utt.split() if elim(b) in lex)
315
316
317
318
319 def check(a):
320 return a not in revlex or a in uttlex or a[-1] == ':'
321 return " ".join((a for a in meaning.split() if check(a)))
322
323
325 """ Interpretation helper function called by interpret(), work out meaning
326 of remaining words by stitching together the best matching fragments.
327 """
328 l = len(words)
329
330
331
332
333 if words and words[0][0] != '[':
334 if words[0] not in ' '.join(exemplars.keys()).split():
335 debug("skipping", repr(words[0]))
336 words = words[1:]
337
338
339
340 if partialmeaning and [w for w in words if '[' in w]:
341 word = [w for w in words if '[' in w][0][2:-1].strip().lower()
342
343 if word in lexicon:
344 pm = conciliate(lexicon[word], partialmeaning, None, debug)
345 if pm:
346 partialmeaning = pm
347 debug("demonstrative dereferenced:", partialmeaning)
348 else:
349
350 return 0, ''
351 partialmeaning[0] += ' ' + lexicon[word]
352 debug("appended demonstrative:", partialmeaning)
353 words = [a for a in words if a[0] != '[']
354
355 for sub in substr_iter(words):
356 substring = " ".join(sub)
357 for b in exemplars:
358
359 if substring in b and set(sub).issubset(b.split()):
360 pm = partialmeaning
361
362 subst = [lexicon[word] for word in sub if word in lexicon]
363 pm = conciliate(exemplars[b], partialmeaning, subst + [''], debug)
364 if pm:
365
366
367 matches = [c for c in words if c in sub]
368 words = [c for c in words if c not in sub]
369 debug(' ', repr(' '.join(matches)), 'in', repr(b))
370 debug(' and', repr(exemplars[b]))
371 debug(' matches', repr(pm))
372
373 m, p = interpretwith(words, pm, exemplars, lexicon, debug)
374 return 1 + m, p
375
376 if words:
377 return 0, ''
378 return 0, partialmeaning
379
381 """ Transform a meaning into a response using adjacency pairs of speech
382 acts.
383 Input: meaning representation
384 Output: meaning representation
385 """
386 def vars(m):
387 return len([a for a in tokens(m) if var(a)])
388 if 'imperative' in meaning:
389
390
391 return 'acknowledgement ' + meaning.split(':')[1]
392 elif 'ynquestion' in meaning:
393 op = choice(('agreement', 'denial'))
394 return op
395 elif 'whquestion' in meaning:
396 if not ':' in meaning:
397 return ''
398 m = 'assertion:' + meaning.split(':', 1)[1]
399
400
401
402 def score(m, a):
403 return edit_dist(tokens(m), tokens(a)) + 0.5*vars(a)
404 d = [(score(m, a), a) for a in exemplars.values()]
405
406 if min(d)[0] < 1:
407
408
409 choices = [a[1] for a in d if a[0] < 1]
410 debug("possible reactions:", sorted(choices))
411 if choices:
412 r = choice(choices)
413 else:
414 return ''
415 m = conciliate(r, m, varclauses(m))
416 if m:
417 n = unifies(r, m)
418 if n:
419 if len(choices) > 1:
420 debug('choice:', m)
421 return n
422 if len(choices) > 1:
423 debug('choice:', m)
424 return m
425 else:
426 return m
427
428 elif 'assertion' in meaning:
429
430 return meaning
431
432
433
434 return ''
435
436 -def express(meaning, exemplars, lexicon):
437 """ Express `meaning' by returning the most similar exemplar. """
438 if meaning in exemplars.values():
439 return choice(revdict(exemplars)[meaning])
440 else:
441
442
443 def dist(m, a):
444 return edit_dist(m.split(), a.split())
445 d = [(dist(meaning, b), a) for a, b in exemplars.items()]
446 if min(d)[0] < len(meaning):
447 return choice([a for a in d if a[0] == min(d)[0]])[1]
448 else:
449 return '0'
450
451 -def expressmulti(meaning, exemplars, constructions, lexicon):
452 """ Express `meaning' by returning a matching exemplar or construction """
453
454 def check(a):
455 return a in ' '.join(constructions.keys())
456 if meaning in exemplars:
457 return ' '.join(a for a in exemplars[meaning] if check(a))
458
459 if meaning in constructions.values():
460 return choice(revdict(constructions)[meaning])
461
462 if any(meaning in clauses for clauses in constructions.values()):
463 return choice([utt for utt, clauses in constructions.items()
464 if meaning in clauses])
465 if any(clauses in meaning for clauses in constructions.values()):
466 return choice([utt for utt, clauses in constructions.items()
467 if clauses in meaning])
468
469 return express2(meaning, exemplars, constructions, lexicon)
470
471 -def express2(meaning, exemplars, constructions, lexicon, debug=dbg):
472 """ Transform a meaning into a two word utterance using exemplars,
473 constructions or the lexicon. Filter result according to lexical
474 knowledge.
475 Input: meaning representation
476 Output: one or two word utterance
477 """
478
479 def reduce(utt):
480 r=' '.join(a for a in utt.split() if a.lower() in lexicon)
481 if r == None:
482 return ''
483 return r
484
485
486 if meaning in exemplars.values():
487 utt = choice(revdict(exemplars)[meaning])
488 debug('reduced:', utt)
489 return reduce(utt)
490
491
492 if meaning in constructions.values():
493 utt = choice(revdict(constructions)[meaning])
494 debug('reduced:', utt)
495 return reduce(utt)
496
497 if any(meaning in clauses for clauses in constructions.values()):
498 utt = choice([utt for utt, clauses in constructions.items()
499 if meaning in clauses])
500 debug('reduced:', utt)
501 return reduce(utt)
502
503
504
505 revlex = revdict(lexicon)
506 if ':' in meaning:
507
508 meaning = meaning.split()
509 else:
510
511 if meaning in revlex:
512 return revlex[meaning]
513 else:
514 return ''
515 if not arg(meaning[1]) or arg(meaning[1])[1] not in uppercase:
516 clause = meaning[1]
517 else:
518 return ''
519 debug("trying to express:", clause)
520
521
522 for a in meaning[1:]:
523 if a in revlex:
524 return choice(revlex[a])
525 if clause in revlex:
526 return revlex[clause].pop()
527
528
529 topic = arg(meaning[1])
530 if topic and topic[0] in uppercase:
531 if len(meaning) > 2:
532 topic = pred(meaning[2])
533 else:
534 topic = meaning[0]
535 comment = pred(meaning[1])
536
537 candidates = [a for a,b in lexicon.items() if comment in b]
538 if candidates:
539 utt = choice([a for a,b in lexicon.items() if comment in b])
540 debug('comment:', comment, 'in', utt)
541 else:
542 utt = ''
543
544
545 if choice([0, 1]):
546 return utt
547
548 candidates = [a for a,b in lexicon.items() if topic in b and utt not in a]
549 if candidates:
550 a = choice(candidates)
551 utt = a + ' ' + utt
552 debug('topic:', topic, 'in', utt)
553 else:
554 debug('topic not expressible?', topic)
555 return utt
556
558 """ Build corpus of constructions from exemplars and lexicon.
559 Input: exemplars & lexicon.
560 Output: constructions, dictionary of pairings between substrings
561 and clauses. """
562 debug('distilling constructions')
563
564 scores = defaultdict(int)
565 n = len(constructions)
566 for utt, meaning in exemplars.items():
567 for a in substr_iter(utt.split()):
568
569 if len(a) == 1:
570 break
571
572 words = ' '.join(b.lower() for b in a if all(c.lower() in lowercase for c in b))
573 if ' ' not in words:
574 continue
575
576 if not any(words in b for b in constructions):
577 score = len([c for c in exemplars if words in c])
578 if score:
579 scores[words] = score
580
581
582 for score, form in sorted(revdict(scores).items()):
583 form = form[0]
584 if form not in scores:
585
586 continue
587 for a in scores.keys():
588
589 if a in form and form != a:
590 scores.pop(a)
591
592
593
594
595
596 meaning = findmeaning(form, exemplars, lexicon, dbg)
597 if meaning:
598 debug('construction (score %d)' % score, repr(form), repr(meaning))
599 constructions[form] = meaning
600 else:
601 ms = [meaning for utt, meaning in exemplars.items() if form in utt]
602 score = '(score %d)' % score
603 candidate = 'not found in candidates %s', str(ms)
604 debug('meaning of construction', repr(form), score, candidate)
605
606
607
608
609 if n == len(constructions):
610 return constructions
611
612 return inferconstructions(exemplars, lexicon)
613
615 """ Given the words in a construction, find the most common meaning
616 associated with it in the corpus of exemplars. """
617 ms = [meaning for utt, meaning in exemplars.items() if form in utt]
618
619
620 revlex = revdict(lexicon)
621 if len(ms) > 1:
622 m = [a.split() for a in ms]
623 m = reduce(intersect, m[1:], m[0])
624
625 if m:
626 n = []
627
628
629 def abstract(a):
630 if a in revlex and any(b in form for b in revlex[a]):
631 return a
632 else:
633 if a[0] == '(':
634 return '(X)'
635
636 return a
637 return ' '.join(abstract(a) for a in m)
638 else:
639
640
641 return max((ms.count(a), a) for a in ms)[1]
642 scores = defaultdict(int)
643 for meaning in ms:
644 for a in substr_iter(meaning.split()):
645 m = ' '.join(a)
646
647 scores[m] += 1
648 return max(revdict(scores))[1][0]
649
650
651
652
653
654 if ms:
655 return ms.pop()
656
657
659 """ Infer lexicon from corpus of exemplars.
660 Input: exemplars.
661 Output: lexicon, dictionary of word-clause pairings. """
662 exlcase = dict((a.lower(), b) for a,b in exemplars.items())
663 n = len(lexicon)
664
665 for utt,m in exemplars.items():
666 if '[' in m:
667 n = int(m[m.index('[')+1 : m.index(']')])
668 clause = m[:m.index('[')].split()[-1]
669 clause += m[m.index(']')+1:].split()[0]
670 word = utt.split()[n].lower()
671 exemplars[utt] = m[:m.index('[')] + m[m.index(']')+1:]
672 if word not in lexicon:
673 lexicon[word] = clause
674 for word in set(" ".join(exemplars.keys()).split()) - set(lexicon.keys()):
675 word = word.lower()
676 utterances = [a.split() for a in exlcase.keys() if word in a.split()]
677 meanings = [b.split() for a,b in exlcase.items() if word in a.split()]
678
679
680 for a in list(reduce(intersect, meanings, meanings[0])):
681 if word in a:
682 if len(a) == 1:
683 lexicon[word] = a
684 else:
685
686
687 def freq(b):
688 return ' '.join(exemplars.values()).count(pred(b))
689 candidates = ((freq(b), b) for b in a.split())
690 lexicon[word] = min(candidates)[1]
691 break
692
693
694
695 if False and word not in lexicon:
696 a = reduce(intersect, utterances, utterances[0])
697 if len(a - set(lexicon.keys())) == 1:
698 lexicon[word] = reduce(intersect, meanings, meanings[0])
699 if len(lexicon[word]) == 0:
700 lexicon[word] = ''
701 else:
702 lexicon[word] = lexicon[word].pop()
703
704 if len(utterances) == len(meanings) == len(utterances[0]):
705 if len(utterances) == len(meanings[0]) == 1:
706 lexicon[word] = meanings[0][0]
707 if verbose:
708 notfound = set(' '.join(exlcase.keys()).split()) - set(lexicon.keys())
709 debug('lexicon: not found:', notfound, '\n')
710
711 if len(lexicon) == n:
712 return inferlexicon(exemplars, verbose)
713 return lexicon
714
716 """ Test whether meaning can be made comptabible with partialmeaning,
717 by looking for a family resemblance with partialmeanings,
718 if found, perform a substitution if necessary. Returns empty string on failure.
719
720 >>> conciliate('question: animal(bunny) do(X)', 'assertion: point(dog) animal(dog)')
721 substituted (bunny) for (dog)
722 'assertion: point(bunny) animal(bunny)'
723 >>> conciliate('assertion: do(hop) animal(bunny)', 'assertion: animal(bunny) do(X)')
724 instantiated (X) with (hop)
725 'assertion: animal(bunny) do(hop)'
726 >>> conciliate('assertion: do(hop) animal(bunny)', 'assertion: point(bunny)')
727 ''
728 """
729 pm = partialmeaning
730 if subst == None:
731
732 subst = pm.split()
733 if pm == '':
734 return meaning
735 partialmeaning = meaning
736 return True
737 success = False
738 for part in meaning.split():
739 if pred(part)+'(' in pm:
740
741
742 if arg(part) and not (var(arg(part)) or arg(part) in pm):
743 oldarg = tokens(pm)[tokens(pm).index(pred(part)) + 1]
744
745 if True in [pred(part) in a for a in subst]:
746 if oldarg[1] in uppercase:
747 debug("instantiated", oldarg, "with", arg(part))
748 partialmeaning = pm = pm.replace(oldarg, arg(part))
749 else:
750 debug("substituted", arg(part), "for", oldarg)
751 partialmeaning = pm = pm.replace(oldarg, arg(part))
752 success = True
753
754
755
756 if pred(part) in uppercase:
757 return partialmeaning
758 return True
759
760 vars = [pred(a) for a in pm.split() if pred(a) in uppercase]
761
762
763 if vars and ':' not in part and pred(part) not in pm:
764 partialmeaning = pm.replace(vars[0], pred(part))
765 debug("instantiated variable predicate with", pred(part))
766
767 return conciliate(meaning, partialmeaning, subst, debug)
768 """
769 #this code would allow to substitute predicates if arguments match:
770 elif arg(part) in pm and arg(part):
771 ar = arg(part)
772 oldpred = pm[pm.index(ar):pm.index(ar)+len(ar)]
773 print ar
774 print oldpred
775 exit()
776 print "substituted", pred(part), "for", oldpred
777 partialmeaning[0] = pm.replace(oldpred, pred(part))
778 return True
779 """
780 if success:
781 return partialmeaning
782 return ''
783 return success
784
786 """ succeed if everything in "partialmeaning" is compatible with "meaning",
787 substituting along the way. Similar to conciliate() but operates on the
788 whole meaning instead of looking at individual clauses, and does not
789 perform substitution, merely instantiation.
790
791 >>> unifies('assertion: do(hop) animal(bunny)', 'assertion: do(X) animal(bunny)')
792 substituted (hop) for (X)
793 'assertion: do(hop) animal(bunny)'
794 >>> unifies('assertion: do(hop) animal(bunny)', 'assertion: animal(bunny) do(X)')
795 ''
796
797 """
798 pm = partialmeaning
799 if pm == '':
800 return meaning
801 partialmeaning = meaning
802 return True
803
804 for part1, part2 in zip(pm.split(), meaning.split()):
805 if part1 == part2:
806 continue
807 elif var(arg(part1)):
808 newarg = tokens(meaning)[tokens(meaning).index(pred(part2)) + 1]
809 debug("substituted", newarg, "for", arg(part1))
810 partialmeaning = pm = pm.replace(arg(part1), newarg)
811 else:
812
813 return ''
814 return partialmeaning
815
817 """ Obtain corpus, either by importing from module `exemplars,'
818 or by falling back to a small sample corpus. """
819 try:
820
821 from exemplars import getexemplars
822 except ImportError:
823 pass
824 else:
825 return getexemplars()
826
827
828 return {'uh': '',
829 'eh': '',
830 'ah': 'acknowledgement',
831 'ok': 'confirmation',
832 'yes': 'confirmation',
833 'yeah': 'confirmation',
834 'no': 'denial',
835 'that\'s right !' : 'reinforcement',
836 'ball' : 'assertion: point(ball) toy(ball)',
837 'throw it to me' : 'imperative: throw[0](X) toy(X)',
838 'you kick the football !' : 'imperative: kick(football) toy(football)',
839 'what is this ?' : 'whquestion: point(X) Y(X)',
840
841
842 'is that a cow ?' : 'whquestion: point(cow) animal(cow)',
843 'what does a bunny do ?': 'whquestion: do(X) animal(bunny)',
844 'what does a cow say ?' : 'whquestion: do(X) animal(cow)',
845 'what does a duckie say ?': 'whquestion: do(X) animal(duck[3])',
846 'what animal does woof woof ?': 'whquestion: animal(X) do(woof)',
847 'what\'s a kitty say ?' : 'whquestion: do(X) animal(cat[2])',
848 'that\'s a kitty' : 'assertion: point(cat) animal(cat[2])',
849 'donkey' : 'assertion: point(donkey) animal(donkey)',
850 'that\'s a donkey' : 'assertion: point(donkey) animal(donkey)',
851 'meouw' : 'assertion: do(meouw) animal(cat)',
852 'quack' : 'assertion: do(quack) animal(duck)',
853 'moo' : 'assertion: do(moo) animal(cow)',
854 'bunny' : 'assertion: animal(bunny) do(hop)',
855 'dog' : 'assertion: animal(dog) do(woof)',
856 'hop' : 'assertion: do(hop) animal(bunny)',
857 'woof woof' : 'assertion: do(woof) animal(dog)',
858 'want some juice ?' : 'ynquestion: want(juice) food(juice)',
859 'chocolate' : 'assertion: food(chocolate)',
860 'what\'s inside ?' : 'whquestion: inside(X) toy(box)',
861
862 'book' : 'assertion: inside(book) toy(box)',
863
864 'book !' : 'assertion: point(book)'
865 }
866
868 """ Interpret some sample utterances. """
869 test = [
870 'what does a bunny do ?',
871 'kitty do ?',
872 'throw the ball',
873 'catch the ball',
874 'where lives birdie ?',
875 "that's a ball",
876 'can you throw it ?',
877 'what does a duckie say ?',
878 'what does this animal [=cow] do ?',
879 'do you want to play with that [=ball] ?',
880 'what does a ball say ?'
881
882
883
884 ]
885
886 exemplars = getexemplars()
887 lexicon = inferlexicon(exemplars)
888 for a in test:
889 print a
890 b = interpret(a, '', exemplars, lexicon, nodbg)
891 print 'interpretation:', b
892
894 """ Edit distance of two sequences. Non-standard features are that all
895 mutations have a cost of 1 (so as not to favor insertions or deletions
896 over substitutions), as well as a decay based on index
897 (mutations with higher indices weigh less).
898
899 >>> edit_dist('foo', 'bar')
900 2.4890507991136213"""
901 def substcost():
902 if a == b:
903 return 0
904
905 else: return 1
906
907 def decay(n):
908
909
910 return math.e ** (-0.2 * n)
911
912
913
914
915 distance = [[0] * (len(target) + 1) for b in range(len(source)+1)]
916 for i, row in enumerate(distance):
917 row[0] = i
918 for j in range(len(distance[0])):
919 distance[0][j] = j
920 distance[0][0] = 0
921
922
923 for i, a in enumerate(source):
924 for j, b in enumerate(target):
925 insc = distance[i][j+1] + decay(i)
926 substc = distance[i][j] + decay(i) * substcost()
927 delc = distance[i+1][j] + decay(i)
928 distance[i+1][j+1] = min((substc, insc, delc))
929 return distance[-1][-1]
930
932 """ Return all substrings of a sequence, in descending order of length.
933
934 >>> list(substr_iter('abc'))
935 ['abc', 'ab', 'bc', 'a', 'b', 'c']"""
936 for a in range(len(seq), -1, -1):
937 for b in range(a, len(seq)):
938 yield seq[b-a:b+1]
939
941 """ Reverse dictionary, ie. swap values and keys; since a value may occur
942 with multiple keys, return a list of all keys associated with a value.
943
944 >>> revdict({0: 1, 1: 2})
945 {1: [0], 2: [1]}"""
946
947
948 return dict((a, [b for b,c in d.items() if c==a ]) for a in d.values())
949
951 """ >>> varclauses(['foo(bar) zed(X)'])
952 ['zed(X)']"""
953 return [a for a in m[0].split() if var(arg(a))]
954
956 """ >>> pred('foo(bar)')
957 'foo'"""
958 if '(' in token:
959 return token[:token.index('(')]
960 else: return token
961
963 """ >>> arg('foo(bar)')
964 '(bar)'"""
965 if '(' in token:
966 return token[token.index('('):]
967 else: return ''
968
970 """ Test whether an argument string is variable:
971
972 >>> var('(bar)')
973 False
974 >>> var('(X)')
975 True
976 """
977 return arg and (arg[0] in uppercase or arg[1] in uppercase)
978
980 """ Turn meaning into a list of operator, predicates and arguments.
981
982 >>> tokens('whquestion: do(X) animal(bunny)')
983 ['whquestion:', 'do', '(X)', 'animal', '(bunny)']
984 """
985 return m.replace('(', ' (').split()
986
988 """ >>> intersect([1,2,3], [2,3])
989 set([2, 3])"""
990 return set(a).intersection(set(b))
991
992
993 if __name__ == '__main__':
994 import doctest
995
996
997 fail, attempted = doctest.testmod(verbose=False,
998 optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)
999 if attempted and not fail:
1000 print "%d doctests succeeded!" % attempted
1001 main()
1002