-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.html
1612 lines (1586 loc) · 216 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.3.450">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Edouard Legoupil, UNHCR Evaluation Office">
<meta name="dcterms.date" content="2024-05-23">
<title>Retrieval-Augmented Generation (RAG) - Technical approach paper on the systematic application of AI in evaluation synthesis and summarization</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
}
pre.numberSource { margin-left: 3em; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
</style>
<script src="index_files/libs/clipboard/clipboard.min.js"></script>
<script src="index_files/libs/quarto-html/quarto.js"></script>
<script src="index_files/libs/quarto-html/popper.min.js"></script>
<script src="index_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="index_files/libs/quarto-html/anchor.min.js"></script>
<link href="index_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="index_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="index_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="index_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="index_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<style>html{ scroll-behavior: smooth; }</style>
<link rel="shortcut icon" href="">
</head>
<body>
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#executive-summary" id="toc-executive-summary" class="nav-link active" data-scroll-target="#executive-summary">Executive Summary</a></li>
<li><a href="#introduction" id="toc-introduction" class="nav-link" data-scroll-target="#introduction">Introduction</a>
<ul class="collapse">
<li><a href="#environment-set-up" id="toc-environment-set-up" class="nav-link" data-scroll-target="#environment-set-up">Environment Set up</a></li>
</ul></li>
<li><a href="#retrieval-generation-pipeline" id="toc-retrieval-generation-pipeline" class="nav-link" data-scroll-target="#retrieval-generation-pipeline">Retrieval & Generation Pipeline</a>
<ul class="collapse">
<li><a href="#information-retrieval" id="toc-information-retrieval" class="nav-link" data-scroll-target="#information-retrieval">Information Retrieval</a></li>
<li><a href="#content-generation" id="toc-content-generation" class="nav-link" data-scroll-target="#content-generation">Content Generation</a></li>
</ul></li>
<li><a href="#continuous-evaluation-process" id="toc-continuous-evaluation-process" class="nav-link" data-scroll-target="#continuous-evaluation-process">Continuous Evaluation Process</a>
<ul class="collapse">
<li><a href="#building-alternative-briefs" id="toc-building-alternative-briefs" class="nav-link" data-scroll-target="#building-alternative-briefs">Building Alternative Briefs</a></li>
<li><a href="#generating-evaluation-dataset" id="toc-generating-evaluation-dataset" class="nav-link" data-scroll-target="#generating-evaluation-dataset">Generating Evaluation Dataset</a></li>
<li><a href="#computing-assessment-metrics" id="toc-computing-assessment-metrics" class="nav-link" data-scroll-target="#computing-assessment-metrics">Computing Assessment Metrics</a></li>
</ul></li>
<li><a href="#production-deployment-strategy" id="toc-production-deployment-strategy" class="nav-link" data-scroll-target="#production-deployment-strategy">Production Deployment Strategy</a>
<ul class="collapse">
<li><a href="#buy-or-build" id="toc-buy-or-build" class="nav-link" data-scroll-target="#buy-or-build">Buy or Build?</a></li>
<li><a href="#ai-ready-data-human-review-for-ground_truth" id="toc-ai-ready-data-human-review-for-ground_truth" class="nav-link" data-scroll-target="#ai-ready-data-human-review-for-ground_truth">“AI-Ready” data: Human Review for <code>ground_truth</code></a></li>
<li><a href="#a-fine-tuned-expert-model" id="toc-a-fine-tuned-expert-model" class="nav-link" data-scroll-target="#a-fine-tuned-expert-model">A Fine-Tuned “expert” Model!</a></li>
</ul></li>
<li><a href="#conclusions" id="toc-conclusions" class="nav-link" data-scroll-target="#conclusions">Conclusions</a></li>
<li><a href="#acknowledgement" id="toc-acknowledgement" class="nav-link" data-scroll-target="#acknowledgement">Acknowledgement</a></li>
</ul>
</nav>
</div>
<main class="content page-columns page-full" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Retrieval-Augmented Generation (RAG) - Technical approach paper on the systematic application of AI in evaluation synthesis and summarization</h1>
<p class="subtitle lead">Evaluating <code>AI</code> Usage for Evaluation Purpose</p>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>Edouard Legoupil, UNHCR Evaluation Office </p>
</div>
</div>
<div>
<div class="quarto-title-meta-heading">Published</div>
<div class="quarto-title-meta-contents">
<p class="date">23 May 2024</p>
</div>
</div>
</div>
</header>
<div class="bg-primary text-white p-3 column-page">
<p><em>“We are drowning in information, while starving for wisdom. The world henceforth will be run by synthesizers, people able to put together the right information at the right time, think critically about it, and make important choices wisely.”</em> - Edward Osborne Wilson</p>
</div>
<section id="executive-summary" class="level2">
<h2 class="anchored" data-anchor-id="executive-summary">Executive Summary</h2>
<p>Artificial Inteligence (AI) is presented as the potential trigger for the <a href="https://www.nature.com/articles/s41599-019-0253-6" target="_blank">fifth wave</a> of the <strong>evidence revolution</strong> (following the 4 previous ones: <em>1.Outcome Monitoring, 2.Impact Evaluation, 3.Systematic Reviews and 4.Knowledge Brokering</em>). This actually reflects a situation where considering the <a href="https://www.uneval.org/evaluation/reports" target="_blank">numbers of published evaluation reports</a> across the UN system, <strong>information retrieval and evidence generalization challenges</strong> have arisen: How to extract lessons and learning across contexts, institutions, programs, and evaluations in order to inform strategies and decision-making in other similar contexts? </p>
<div class="columns">
<div class="column" style="width:60%;">
<p>The key deliverable from an evaluation is usually a long report (<em>often over 60 pages PDF file</em>). From this report, two-pagers executive “briefs” are usually designed for the consumption of a broader audience including senior executives. Striking the balance between breadth and depth is a common challenge but what remains even more challenging is the subjective dimension involved in <strong>choosing what to include and what to exclude</strong>. Highlighting critical aspects while deciding on what are the less relevant details to omit rely on people’s judgment as to what is important for specific audiences… The potential fear of being, like <em>Cassandra</em> in the greek mythology, the bearer of bad news comes with the structural risk of “cushioning” the real evaluation findings to a point where they get hidden. Relying on automated retrieval can therefore help improving the objectivity and independence of the evaluation report summarization.</p>
</div><div class="column" style="width:40%;">
<p><img src="img/de-munt-cassandra-afgebeeld-in-een-script-van-shakespeares-troilus-and-cressida--mzk5njg5mzgzmw.jpg" class="img-fluid" data-fit-align="center"></p>
</div>
</div>
<p>Retrieval-augmented generation (RAG) is an AI Question-Answering framework that <a href="https://arxiv.org/pdf/2005.11401">surfaced in 2020</a> and that synergizes the capabilities of Large Language Models (LLMs) and information retrieval systems from specific domain of expertise (hereafter “evaluation reports”). This paper is presenting the challenges and opportunities associated with this approach in the context of evaluation. It then suggests a potential solution and way forward.</p>
<p>First, we explain how to create an initial <a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_recursivecharactertext_bert.docx" target="_blank">two-pagers evaluation brief</a> using an orchestration of functions and models from <a href="https://huggingface.co/docs/hub/index" target="_blank">Hugging Face Hub</a>. Rather than relying on ad-hoc user interactions through a <em>black-box point & click</em> chat interface, a relevant alternative is to use a data science approach with documented and <strong>reproducible scripts</strong> that can directly output a word document. The same approach could actually be applied to other textual analysis needs, for instance: extracting causal chains from the transcriptions of Focus Group Discussions, performing Quality Assurance review on key documents, generating potential theories of change from needs assessment reports or assessing sufficient usage of programmatic evidence when developing Strategic Plan for Operation.</p>
<p>Second, we review the techniques that can be used to <strong>evaluate the performance</strong> of summarisation scripts both to optimize them but also to minimize the risk of AI hallucinations and misalignment. We generate alternative briefs (<a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_mmr_recursivecharactertext_bge.docx" target="_blank">#2</a>, <a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_parent_recursivecharactertext_bge.docx" target="_blank">#3</a>, <a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_ensemble_recursivecharactertext_bge.docx" target="_blank">#4</a>) and then create an specific <a href="https://github.com/Edouard-Legoupil/rag_extraction/tree/main/dataset" target="_blank">test dataset</a> to explore the different metrics that can be used to evaluate the information retrieval process.</p>
<p>Last we discuss how such approach can actually inform decisions and strategies for an <strong>efficient AI deployment</strong>: While improving RAG pipeline is the first important step, creating training dataset with human-in-the-loop allows to “ground truth” and “fine-tune” an existing model. This not only further increase its performance and but also ensure its reliability both for evidence retrieval and at latter stage for learning across systems and contexts.</p>
<p>A short presentation is also <a href="https://edouard-legoupil.github.io/rag_extraction/prez/prez.html" target="_blank">available here</a></p>
<hr>
</section>
<section id="introduction" class="level2">
<h2 class="anchored" data-anchor-id="introduction">Introduction</h2>
<p>Building a robust information retrieval system requires the configuration of different components:</p>
<ol type="1">
<li><p><strong>A Retrieval & Generation Pipeline</strong>: Build a knowledge base and configure how to retrieve the information from it then define efficient prompt to query the system;</p></li>
<li><p><strong>A Continuous Evaluation Process</strong>: Explore and combine various options for both Retrieval and Generation to compare the results.</p></li>
<li><p><strong>A Production Deployment Strategy</strong>: Organise AI-ready human feedback and prepare data for fine-tuning.</p></li>
</ol>
<p>This paper compiles the results of experimentation applied to a practical use case. It includes a <em>cookbook with reproducible recipes</em> so that colleagues can <a href="https://github.com/Edouard-Legoupil/rag_extraction/" target="_blank">rerun and learn</a> from it. It also contains broader suggestions on the usage of AI for summarizing and synthesizing evaluations products and reports.</p>
<p>Non-technical audience can consume the <a href="#executive-summary">executive summary</a> above, the <a href="#conclusions">conclusions</a> and the <a href="https://edouard-legoupil.github.io/rag_extraction/prez/prez.html" target="_blank">linked presentation</a>.</p>
<section id="environment-set-up" class="level3">
<h3 class="anchored" data-anchor-id="environment-set-up">Environment Set up</h3>
<p>The body of this document targets a technical audience that may consider including such techniques within their personal information management toolkit, and this working safely, fully offline on their own computer. To get this audience interest, we used the <a href="https://www.unhcr.org/sites/default/files/legacy-pdf/5dd4f7d24.pdf" target="_blank">2019 Evaluation of UNHCR’s data use and information management approaches</a> for the demo. Readers shall be able adjust this tutorial to their own use cases and are welcome to ask questions and share comments through a <a href="https://github.com/Edouard-Legoupil/rag_extraction/issues/new" target="_blank">ticket in the source repository</a>!</p>
<p>The world of LLMs is a <a href="https://www.python.org/downloads/" target="_blank">Python</a> one. The scripts below are based on <a href="https://python.langchain.com/v0.1/docs/use_cases/question_answering/" target="_blank">langChain python module</a> but the same pipeline could also be built with another LLM orchestration module like <a href="https://docs.llamaindex.ai/en/stable/use_cases/q_and_a/" target="_blank">LlamaIndex</a></p>
<p>Make sure to install the last stable version of python and create a dedicated python environment to have a fresh install where to manage correctly all the dependencies between packages. This can be done with <a href="https://docs.conda.io/en/latest/" target="_blank">conda</a> python modules management utility.</p>
<p>First directly in your OS Shell, create a new environment - here called <strong><code>evalenv</code></strong></p>
<blockquote class="blockquote">
<p>conda create –name evalenv python=3.11</p>
</blockquote>
<p>Then activate it! Et voila!</p>
<blockquote class="blockquote">
<p>conda activate evalenv</p>
</blockquote>
<p>Once this environment selected as a kernel to run the notebook, we can install the required python modules for RAG:</p>
<div class="cell" data-execution_count="1">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co">## Library to load the PDF</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet install pypdf</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="co">## Library for chunking</span></span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet tiktoken</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet nltk</span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="co">## Library for the embedding</span></span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet gpt4all</span>
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet sentence<span class="op">-</span>transformers</span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a><span class="co">## Library to store the embeddng in a vector DB</span></span>
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet chromadb</span>
<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a><span class="co">## Library for information retrieval</span></span>
<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet rank_bm25</span>
<span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a><span class="co">## Library for the LLM interaction</span></span>
<span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet install langchain</span>
<span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet langchain<span class="op">-</span>community</span>
<span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a><span class="co">## Library to save the results in a word document</span></span>
<span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet python<span class="op">-</span>docx </span>
<span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet markdown</span>
<span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a><span class="co">## Library to evaluate the RAG process</span></span>
<span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet datasets</span>
<span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet ragas </span>
<span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a><span class="co">## Library to save evaluation dataset in excel</span></span>
<span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet panda</span>
<span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet openpyxl</span>
<span id="cb1-33"><a href="#cb1-33" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade <span class="op">--</span>quiet plotly</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="2">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># then Restart the jupyter kernel for this notebook</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>reset <span class="op">-</span>f</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
</section>
<section id="retrieval-generation-pipeline" class="level2">
<h2 class="anchored" data-anchor-id="retrieval-generation-pipeline">Retrieval & Generation Pipeline</h2>
<p>The illustration from <a href="https://huggingface.co/learn/cookbook/en/rag_evaluation" target="_blank">HuggingFace RAG Evaluation</a> below nicely visualize the first two elements of the system architecture: retrieval (that includes: chunking, embedding, storing and retrieving) and generation (that includes prompting an LLM).</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="img/RAG_workflow.png" class="img-fluid figure-img"></p>
<figcaption class="figure-caption">RAG Evaluation, https://huggingface.co/learn/cookbook/en/rag_evaluation</figcaption>
</figure>
</div>
<section id="information-retrieval" class="level3">
<h3 class="anchored" data-anchor-id="information-retrieval">Information Retrieval</h3>
<section id="load-the-pdf" class="level4">
<h4 class="anchored" data-anchor-id="load-the-pdf">Load the PDF</h4>
<p>There plenty of potential python packages to load pdf files… More details <a href="https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf" target="_blank">here</a>. Note that more loader also exist for other type of data!!!</p>
<div class="cell" data-execution_count="3">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.document_loaders <span class="im">import</span> PyPDFLoader</span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>loader <span class="op">=</span> PyPDFLoader(<span class="st">"files/Info_Mngt_eval_2019.pdf"</span>)</span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>docs <span class="op">=</span> loader.load_and_split()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="chunking" class="level4">
<h4 class="anchored" data-anchor-id="chunking">Chunking</h4>
<p>If you have a large document, because of memory management, you will not be able to process it in one chunk. LangChain offers several built-in text splitters to <strong>divide text into smaller chunks</strong> based on different criteria.</p>
<p>Example of options that can be tested are:</p>
<ul>
<li><p>Simple character-level processing with <code>CharacterTextSplitter,</code></p></li>
<li><p>Recursive Splitting with <code>RecursiveCharacterTextSplitter</code>,</p></li>
<li><p>Words or semantic units with <code>TokenTextSplitter</code>,</p></li>
<li><p>Context-aware splitting with <code>NLTKTextSplitter</code> .</p></li>
</ul>
<p>See example to understand how chunking works, see this online <a href="https://huggingface.co/spaces/m-ric/chunk_visualizer" target="_blank">viz</a>.</p>
<div class="cell" data-execution_count="4">
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.text_splitter <span class="im">import</span> CharacterTextSplitter</span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>splitter_text <span class="op">=</span> CharacterTextSplitter(</span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> chunk_size<span class="op">=</span><span class="dv">1000</span>, </span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> chunk_overlap<span class="op">=</span><span class="dv">200</span></span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a>chunks_text <span class="op">=</span> splitter_text.split_documents(docs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="5">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.text_splitter <span class="im">import</span> RecursiveCharacterTextSplitter </span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>splitter_recursivecharactertext <span class="op">=</span> RecursiveCharacterTextSplitter(</span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> chunk_size<span class="op">=</span><span class="dv">1000</span>, </span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a> chunk_overlap<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> add_start_index<span class="op">=</span><span class="va">True</span>,</span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a> separators<span class="op">=</span>[<span class="st">"</span><span class="ch">\n\n</span><span class="st">"</span>, <span class="st">"</span><span class="ch">\n</span><span class="st">"</span>, <span class="st">"."</span>, <span class="st">" "</span>, <span class="st">""</span>],</span>
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a>chunks_recursivecharactertext <span class="op">=</span> splitter_recursivecharactertext.split_documents(docs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="6">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.text_splitter <span class="im">import</span> TokenTextSplitter</span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>splitter_tokentext <span class="op">=</span> TokenTextSplitter(</span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> chunk_size<span class="op">=</span><span class="dv">1000</span>, </span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> chunk_overlap<span class="op">=</span><span class="dv">200</span></span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>chunks_tokentext <span class="op">=</span> splitter_tokentext.split_documents(docs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="7">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_text_splitters <span class="im">import</span> NLTKTextSplitter</span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>splitter_nltktext <span class="op">=</span> NLTKTextSplitter(chunk_size<span class="op">=</span><span class="dv">1000</span>)</span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>chunks_nltktext <span class="op">=</span> splitter_nltktext.split_documents(docs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="instantiate-a-vector-database-and-generate-embedding" class="level4">
<h4 class="anchored" data-anchor-id="instantiate-a-vector-database-and-generate-embedding">Instantiate a Vector Database and Generate Embedding</h4>
<p>A <a href="https://python.langchain.com/docs/modules/data_connection/vectorstores/" target="_blank">vector database</a> is a database that allows to efficiently store and query embeddings. Embeddings are actually <strong>numeric representations of text data</strong>. This conversion from text to numeric is used to represent words, sentences, or even entire documents in a compact and meaningful way. It captures the essence of a word’s meaning, context, and relationships with other words.</p>
<p>Vector databases extend the capabilities of traditional relational databases to embedding. However, the key distinguishing feature of a vector database is that query results aren’t an exact match to the query. Instead, using a specified <strong>similarity metric</strong>, the vector database returns data that are similar to a query.</p>
<p>There are here again numerous options in terms of Open Source vector DB that can be used - for instance: <a href="https://www.trychroma.com/" target="_blank">ChromaDB</a>, <a href="https://qdrant.tech/" target="_blank">Qdrant</a>, <a href="https://milvus.io/" target="_blank">Milvus</a> or <a href="https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/" target="_blank">FAISS</a>. Here we will just use Chroma.</p>
<div class="cell" data-execution_count="8">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.vectorstores <span class="im">import</span> Chroma</span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> chromadb </span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>chroma_client <span class="op">=</span> chromadb.PersistentClient(path<span class="op">=</span><span class="st">"persist/"</span>)</span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a><span class="co">## A collection is created with the following</span></span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a><span class="co">#chroma_collection = chroma_client.create_collection('collection')</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>To generate embedding, we need a dedicated model and there’s no single “best” option to select it. Words with similar contexts tend to have closer vector representations. Some static word embeddings models are good capturing basic semantic relationships and are computationally efficient and fast but might not capture complex semantics or context-dependent meanings. Contextual Embeddings models have been developped to capture word meaning based on context, considering surrounding words in a sentence and handling ambiguity. But this can lead to computationally expensive training and usage and resulting embedding model with a large size.</p>
<p>Here we start with testing with the small 44MB <a href="https://docs.gpt4all.io/gpt4all_python_embedding.html" target="_blank">MiniLM embedding</a></p>
<div class="cell" data-execution_count="9">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.embeddings <span class="im">import</span> GPT4AllEmbeddings </span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>embeddings_bert <span class="op">=</span> GPT4AllEmbeddings(</span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> model_name <span class="op">=</span> <span class="st">"all-MiniLM-L6-v2.gguf2.f16.gguf"</span></span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Now we can store the embeddings and associated metadata in the <code>chroma vector</code> database using a specific collection name. Below we create distinct stores for each chunking options.</p>
<div class="cell" data-execution_count="10">
<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>vectorstore_text_bert <span class="op">=</span> Chroma.from_documents(</span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a> documents<span class="op">=</span>chunks_text, </span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> embedding<span class="op">=</span>embeddings_bert, </span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> collection_name<span class="op">=</span> <span class="st">"text_bert"</span>,</span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> persist_directory <span class="op">=</span> <span class="st">"persist"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="11">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>vectorstore_recursivecharactertext_bert <span class="op">=</span> Chroma.from_documents(</span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> documents<span class="op">=</span>chunks_recursivecharactertext,</span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> embedding<span class="op">=</span>embeddings_bert,</span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a> collection_name<span class="op">=</span> <span class="st">"recursivecharactertext_bert"</span>,</span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> persist_directory <span class="op">=</span> <span class="st">"persist"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="12">
<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>vectorstore_tokentext_bert <span class="op">=</span> Chroma.from_documents(</span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a> documents<span class="op">=</span>chunks_tokentext, </span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a> embedding<span class="op">=</span>embeddings_bert, </span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a> collection_name<span class="op">=</span> <span class="st">"tokentext_bert"</span>,</span>
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a> persist_directory <span class="op">=</span> <span class="st">"persist"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="13">
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>vectorstore_nltktext_bert <span class="op">=</span> Chroma.from_documents(</span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a> documents<span class="op">=</span>chunks_nltktext, </span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a> embedding<span class="op">=</span>embeddings_bert, </span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a> collection_name<span class="op">=</span> <span class="st">"nltktext_bert"</span>,</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a> persist_directory <span class="op">=</span> <span class="st">"persist"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="retrieve-embeddings-from-persistent-storage" class="level4">
<h4 class="anchored" data-anchor-id="retrieve-embeddings-from-persistent-storage">Retrieve embeddings from persistent storage</h4>
<p>We can re-open a previous database using its folder path:</p>
<div class="cell" data-execution_count="14">
<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> chromadb</span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> chromadb.PersistentClient(path<span class="op">=</span><span class="st">"persist/"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Then we can get the name of collection available within that database</p>
<div class="cell" data-execution_count="15">
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>collections <span class="op">=</span> client.list_collections()</span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(collections)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>and get a previously saved vector collection</p>
<div class="cell" data-execution_count="16">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.vectorstores <span class="im">import</span> Chroma</span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>vectorstore_text_bert <span class="op">=</span> Chroma(collection_name<span class="op">=</span><span class="st">"text_bert"</span>,</span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a> persist_directory<span class="op">=</span><span class="st">"persist/"</span>, </span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a> embedding_function<span class="op">=</span>embeddings_bert) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
</section>
<section id="content-generation" class="level3">
<h3 class="anchored" data-anchor-id="content-generation">Content Generation</h3>
<section id="set-up-a-local-llm" class="level4">
<h4 class="anchored" data-anchor-id="set-up-a-local-llm">Set up a local LLM</h4>
<p>If do not have access to a LLM API, an alternative is to install a local one and there are again plenty of Foundation LLM options to select from. Foundation models are AI neural networks trained on massive amounts of raw data (usually with unsupervised learning) that can be adapted to a wide variety of tasks.</p>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note
</div>
</div>
<div class="callout-body-container callout-body">
<p>Open-source Large Language Models (LLM) have multiple advantages:</p>
<ul>
<li><p><strong>Cost & Energy Savings</strong>: generally more affordable in the long run as they don’t involve licensing fees, once infrastructure is setup and/or can be used offline on local computer. More insights on total cost of Owernership can be gained <a href="https://huggingface.co/spaces/mithril-security/TCO_calculator" target="_blank">here</a>. One element is also that most of the open source model have comparatively a lot less parameters (3b to 70b) than the large GPT ones (over 150b) which directly impact on infererence costs, i.e. computing cost to generate an answer.</p></li>
<li><p><strong>Data Protection</strong>: allow to use within the data enclave of your own computer without any data being sent to a remote server.</p></li>
<li><p><strong>Transparency and Flexibility</strong>: accessible to the public, allowing developers to inspect, modify, and distribute the code. This transparency fosters a community-driven development process, leading to rapid innovation and diverse applications.</p></li>
<li><p><strong>Added Features and Community Contributions</strong>: can leverage multiple providers and internal teams for updates and support, which enables to stay at the forefront of technology and exercise greater control over their usage.</p></li>
<li><p><strong>Customizability</strong>: allow for added features and benefit from community contributions. They are ideal for projects that require customization and those where budget constraints are a primary concern.</p></li>
</ul>
</div>
</div>
<p>There are multiple options to do that. An easy one is to install <a href="https://ollama.com/" target="_blank">OLLAMA</a>, which offers a wide <a href="https://ollama.com/library" target="_blank">variety of open models</a> from the “AI Race” competitors <a href="https://chat.lmsys.org/" target="_blank">arena</a>, for instance: <a href="https://ollama.com/library/llama3" target="_blank">LLama3</a> from Facebook, <a href="https://ollama.com/library/gemma" target="_blank">gemma</a> from Google, <a href="https://ollama.com/library/phi3" target="_blank">phi3</a> from Microsoft but also <a href="https://ollama.com/library/qwen" target="_blank">qwen</a> from the Chinese AliBaba, <a href="https://ollama.com/library/falcon" target="_blank">falcon</a> from the Emirati Technology Innovation Institute, or <a href="https://ollama.com/library/mixtral" target="_blank">Mixtral</a> from the french startup Mistral_AI. Langchain has a dedicated <a href="https://python.langchain.com/docs/integrations/chat/ollama" target="_blank">module to work with ollama</a>.</p>
<p>Below, we start with <a href="https://mistral.ai/news/mixtral-of-experts/" target="_blank">Mixtral Sparse Mixture-of-Expert</a>, and specifically the quantized version: <a href="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF#explanation-of-quantisation-methods" target="_blank">8x7b-instruct-v0.1-q4_K_M</a>, an open-weight model designed to optimize performance-to-cost ratio, aka small in size to run on a strong laptop but good in performance. This download a file with the model which size around 26Gb.</p>
<div class="cell" data-execution_count="17">
<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.chat_models <span class="im">import</span> ChatOllama</span>
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>ollama_mixtral <span class="op">=</span> ChatOllama(</span>
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a> model<span class="op">=</span><span class="st">"mixtral:8x7b-instruct-v0.1-q4_K_M"</span>, </span>
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a> temperature<span class="op">=</span><span class="fl">0.2</span>, </span>
<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a> request_timeout<span class="op">=</span><span class="dv">500</span></span>
<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The temperature is setting the <em>creativeness</em> of the response - the higher the more creative - below we will remain conservative! It is the equivalent of the <em>conversation style</em> setting in copilot: <em>creative [1-0.7], balanced ]0.7-0.4], precise ]0.4,0]</em>…</p>
</section>
<section id="summarisation-prompt" class="level4">
<h4 class="anchored" data-anchor-id="summarisation-prompt">Summarisation Prompt</h4>
<p>A prompt is is a piece of text or a set of instructions, used by the LLM to generate a response or perform a task. Writing a good summarization prompt involves a few key steps:</p>
<ul>
<li><p>Be <strong>Specific</strong>: Clearly state what you want to summarize. For example, “Summarize this Operation Strategic Plan in 200 words using abstractive summarization” or “Provide a summary of this needs assessment report, highlighting its key takeaways”.</p></li>
<li><p>Define the <strong>Scope</strong>: Specify the length or depth of the summary you need. For instance, “Summarize this text into two paragraphs with simple language to make it easier to understand” or “Create a summary of this report by summarizing all chapters separately and then generating an overall summary of the report”.</p></li>
<li><p>Set the <strong>Context</strong>: If the summary is for a specific purpose or audience, mention it in the prompt. For example, “I need to write talking points based on this report. Help me summarize this text for better understanding so that I can use it as an introduction emai” or “Summarize this for me like I’m 8 years old”.</p></li>
<li><p>Use <strong>Clear and Concise</strong> Language: Avoid unnecessary complexity or ambiguity. A good prompt should provide enough direction to start but leave room for creativity.</p></li>
</ul>
<p>Here we will try to create a prompt that generate an “Evaluation Brief” from the larger evaluation report.</p>
<p>Mixtral comes with specific tags to use for the prompt:</p>
<p><code><s>\[INST\] Instruction \[/INST\] Model answer</s>\[INST\] Follow-up instruction \[/INST\]</code></p>
<div class="cell" data-execution_count="18">
<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>RAG_prompt <span class="op">=</span> <span class="st">"""</span></span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="st"><s> </span></span>
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a><span class="st">[INST]Act if you were a public program evaluation expert working for UNHCR. </span></span>
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="st">Your audience target is composed of Senior Executives that are managing the operation or program that got evaluated.[/INST]</span></span>
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="st">Your task is to generate an executive summary of the report you just ingested. </span></span>
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="st"></s></span></span>
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="st">[INST]</span></span>
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="st">The summary should follow the following defined structure:</span></span>
<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a><span class="st"> </span></span>
<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a><span class="st"> - In the first part titled "What have we learn?", start with a description of the Forcibly Displaced population in the operation and include as 5 bullet points, the main challenges in relation with the evaluation objectives that have been identified in the document. </span></span>
<span id="cb18-13"><a href="#cb18-13" aria-hidden="true" tabindex="-1"></a><span class="st"> For each challenge explain why it's a problem and give a practical example to illustrate the consequence of this problem.</span></span>
<span id="cb18-14"><a href="#cb18-14" aria-hidden="true" tabindex="-1"></a><span class="st"> </span></span>
<span id="cb18-15"><a href="#cb18-15" aria-hidden="true" tabindex="-1"></a><span class="st"> - In a second part titled: "How did we get there?" try to review the common root causes for all the challenges that have been identified. </span></span>
<span id="cb18-16"><a href="#cb18-16" aria-hidden="true" tabindex="-1"></a><span class="st"> </span></span>
<span id="cb18-17"><a href="#cb18-17" aria-hidden="true" tabindex="-1"></a><span class="st"> - In a third part, title: "What is working well?", provide a summary of the main success and achievement, i.e. things that have been identified as good practices and / or effective by the evaluators.</span></span>
<span id="cb18-18"><a href="#cb18-18" aria-hidden="true" tabindex="-1"></a><span class="st"> </span></span>
<span id="cb18-19"><a href="#cb18-19" aria-hidden="true" tabindex="-1"></a><span class="st"> - In the fourth part: "Now What to do?", include and summarize the recommendations proposed by the evaluation. Classify the recommendations according to their relevant level:</span></span>
<span id="cb18-20"><a href="#cb18-20" aria-hidden="true" tabindex="-1"></a><span class="st"> </span></span>
<span id="cb18-21"><a href="#cb18-21" aria-hidden="true" tabindex="-1"></a><span class="st"> 1. "Operational Level": i.e recommendations that need to be implemented in the field as an adaptation or change of current practices. Please flag clearly, if this is the case, the recommendations related to practice that should be stopped or discontinued;</span></span>
<span id="cb18-22"><a href="#cb18-22" aria-hidden="true" tabindex="-1"></a><span class="st"> </span></span>
<span id="cb18-23"><a href="#cb18-23" aria-hidden="true" tabindex="-1"></a><span class="st"> 2. "Organizational level": i.e recommendations that require changes in staffing or capacity building. Please flag clearly, if this is the case, the recommendations related to practice that should be stopped or discontinued;</span></span>
<span id="cb18-24"><a href="#cb18-24" aria-hidden="true" tabindex="-1"></a><span class="st"> </span></span>
<span id="cb18-25"><a href="#cb18-25" aria-hidden="true" tabindex="-1"></a><span class="st"> 3. "Strategic Level": i.e recommendations that require a change in existing policy and rules.</span></span>
<span id="cb18-26"><a href="#cb18-26" aria-hidden="true" tabindex="-1"></a><span class="st"> </span></span>
<span id="cb18-27"><a href="#cb18-27" aria-hidden="true" tabindex="-1"></a><span class="st"> - At the end, for the "Conclusion", craft a reflective conclusion in one sentence that highlights the broader significance of the discussed topic. </span></span>
<span id="cb18-28"><a href="#cb18-28" aria-hidden="true" tabindex="-1"></a><span class="st">[/INST]</span></span>
<span id="cb18-29"><a href="#cb18-29" aria-hidden="true" tabindex="-1"></a><span class="st">"""</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="set-up-the-retriever" class="level4">
<h4 class="anchored" data-anchor-id="set-up-the-retriever">Set up the Retriever</h4>
<p>A <a href="https://python.langchain.com/docs/modules/data_connection/retrievers/" target="_blank">retriever</a> acts as an information gatekeeper in the RAG architecture. Its primary function is to search through a large corpus of data to find relevant pieces of information that can be used for text generation. You can think of it as a specialized librarian who knows exactly which ‘books’ to pull off the ‘shelves’ when you ask a question. In other words, the retriever first fetches relevant parts of the document pertaining to the user query, and then the Large Language Model (LLM) uses this information to generate a response.</p>
<p>The search_type argument within <code>vectorstore.as_retriever</code> for LangChain allows you to specify the retrieval strategy used to find relevant documents in your vector store. Different options are available:</p>
<ol type="1">
<li><p>If you simply want the most relevant documents, “<strong>similarity</strong>” (default): This is the most common search type and is used by default. It performs a standard nearest neighbor search based on vector similarity. The retriever searches for documents in the vector store whose vector representations are closest to the query vector. Documents with higher similarity scores are considered more relevant and are returned first.</p></li>
<li><p>If you need diverse results that cover different aspects of a topic, “<strong>mmr</strong>” (Maximum Marginal Relevance): This search type focuses on retrieving documents that are both relevant to the query and diverse from each other. It aims to avoid redundancy in the results. MMR is particularly useful when you want a collection of documents that cover different aspects of a topic, rather than just multiple copies of the most similar document.</p></li>
<li><p>If you want to ensure a minimum level of relevance,“<strong>similarity_score_threshold</strong>”: This search type retrieves documents based on a similarity score threshold. It only returns documents that have a similarity score above the specified threshold. This allows you to filter out documents with low relevance to the query.</p></li>
</ol>
<p>The retriever also takes a series of potential parameters. The <code>search_kwargs={"k": 2,"score_threshold":0.8}</code> argument is a dictionary used to configure how documents are retrieved during the search process. This argument lets you control how many results you get (up to two in this case) and how good those results need to be (with a score of at least 0.8):</p>
<ul>
<li><p><strong>k</strong> (int): This parameter controls the number of documents to retrieve from the search. In this case, k: 2 specifies that the retriever should return up to two documents that match the search query.</p></li>
<li><p><strong>score_threshold</strong> (float): This parameter sets a minimum score threshold for retrieved documents. Documents with a score lower than 0.8 will be excluded from the results. This essentially acts as a quality filter, ensuring a certain level of relevance between the query and retrieved documents.</p></li>
</ul>
<p>The scoring mechanism used by the retriever might depend on the specific retriever implementation. It’s likely based on how well the retrieved documents match the search query. The effectiveness of these parameters depends on your specific use case and the quality of the underlying retrieval system.</p>
<p>Even with “similarity”, the retrieved documents might have varying degrees of relevance. Consider using ranking techniques within LangChain to further refine the results based on additional criteria. The underlying vector store might have limitations on the supported search types. Always refer to the documentation of your specific vector store to confirm available options.</p>
<p>We can build multiple retrievers out of the same <code>vectorstore</code>:</p>
<div class="cell" data-execution_count="19">
<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>ragRetriever_text_bert <span class="op">=</span> vectorstore_text_bert.as_retriever()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="20">
<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>ragRetriever_recursivecharactertext_bert <span class="op">=</span> vectorstore_recursivecharactertext_bert.as_retriever()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="21">
<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>ragRetriever_similarity_tokentext_bert <span class="op">=</span> vectorstore_tokentext_bert.as_retriever(</span>
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a> search_type<span class="op">=</span><span class="st">"similarity_score_threshold"</span>,</span>
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a> search_kwargs<span class="op">=</span>{</span>
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a> <span class="st">"k"</span>: <span class="dv">3</span>,</span>
<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a> <span class="st">"score_threshold"</span>: <span class="fl">0.4</span>,</span>
<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a> },</span>
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="22">
<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>ragRetriever_similarity_nltktext_bert <span class="op">=</span> vectorstore_nltktext_bert.as_retriever(</span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a> search_type<span class="op">=</span><span class="st">"similarity_score_threshold"</span>,</span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a> search_kwargs<span class="op">=</span>{</span>
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a> <span class="st">"k"</span>: <span class="dv">5</span>,</span>
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a> <span class="st">"score_threshold"</span>: <span class="fl">0.8</span>,</span>
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a> },</span>
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="build-the-chain" class="level4">
<h4 class="anchored" data-anchor-id="build-the-chain">Build the Chain</h4>
<p>A retrieval question-answer chain act as a pipe: it takes an incoming question, look up relevant documents using a retriever, then pass those documents along with the original question into an LLM and return the answer the original question.</p>
<div class="cell" data-execution_count="23">
<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_core.prompts <span class="im">import</span> ChatPromptTemplate</span>
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>prompt_retrieval <span class="op">=</span> ChatPromptTemplate.from_template(</span>
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a><span class="st">"""Answer the following question based only on the provided context:</span></span>
<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a><span class="st"><context></span></span>
<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a><span class="sc">{context}</span></span>
<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a><span class="st"></context></span></span>
<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a><span class="st">Question: </span><span class="sc">{input}</span><span class="st">"""</span></span>
<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>and last the retrieval chain!</p>
<div class="cell" data-execution_count="24">
<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.chains.combine_documents <span class="im">import</span> create_stuff_documents_chain</span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.chains <span class="im">import</span> create_retrieval_chain</span>
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>combine_docs_chain_mixtral <span class="op">=</span> create_stuff_documents_chain(</span>
<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a> ollama_mixtral ,</span>
<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a> prompt_retrieval</span>
<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a>qa_chain_recursivecharactertext_bert <span class="op">=</span> create_retrieval_chain(</span>
<span id="cb24-9"><a href="#cb24-9" aria-hidden="true" tabindex="-1"></a> ragRetriever_recursivecharactertext_bert, </span>
<span id="cb24-10"><a href="#cb24-10" aria-hidden="true" tabindex="-1"></a> combine_docs_chain_mixtral</span>
<span id="cb24-11"><a href="#cb24-11" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Note that from this stage, the following steps may take time to run - this will be highly dependent on the power of your computer - obviously the availability of GPUs - Graphical Processing Unit - will significantly increase the speed! FYI, this notebook was built on a Thinkpad P53 with a Quadro T1000 GPU.</p>
<div class="cell" data-execution_count="25">
<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>response_recursivecharactertext_bert <span class="op">=</span> qa_chain_recursivecharactertext_bert.invoke({<span class="st">"input"</span>: RAG_prompt}) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="save-in-a-word-document" class="level4">
<h4 class="anchored" data-anchor-id="save-in-a-word-document">Save in a word document</h4>
<p>To complete the process, let’s save the result directly within a <a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_recursivecharactertext_bert.docx" target="_blank">word document</a>!</p>
<p>This can be automated with a created function <code>create_word_doc</code> that will reformat the text output from the LLM that uses the <a href="https://www.markdownguide.org/getting-started/" target="_blank">standard Markdown format</a> to the equivalent in Word:</p>
<div class="cell" data-execution_count="26">
<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> docx</span>
<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> markdown <span class="im">import</span> markdown</span>
<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> re</span>
<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> create_word_doc(text, file_name):</span>
<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a> <span class="co"># Create a document</span></span>
<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a> doc <span class="op">=</span> docx.Document()</span>
<span id="cb26-8"><a href="#cb26-8" aria-hidden="true" tabindex="-1"></a> <span class="co"># add a heading of level 0 (largest heading)</span></span>
<span id="cb26-9"><a href="#cb26-9" aria-hidden="true" tabindex="-1"></a> doc.add_heading(<span class="st">'Evaluation Brief'</span>, <span class="dv">0</span>) </span>
<span id="cb26-10"><a href="#cb26-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb26-11"><a href="#cb26-11" aria-hidden="true" tabindex="-1"></a> <span class="co"># Split the text into lines</span></span>
<span id="cb26-12"><a href="#cb26-12" aria-hidden="true" tabindex="-1"></a> lines <span class="op">=</span> text.split(<span class="st">'</span><span class="ch">\n</span><span class="st">'</span>)</span>
<span id="cb26-13"><a href="#cb26-13" aria-hidden="true" tabindex="-1"></a> <span class="co"># Create a set to store bolded and italic strings</span></span>
<span id="cb26-14"><a href="#cb26-14" aria-hidden="true" tabindex="-1"></a> bolded_and_italic <span class="op">=</span> <span class="bu">set</span>()</span>
<span id="cb26-15"><a href="#cb26-15" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> line <span class="kw">in</span> lines:</span>
<span id="cb26-16"><a href="#cb26-16" aria-hidden="true" tabindex="-1"></a> <span class="co"># Check if the line is a heading</span></span>
<span id="cb26-17"><a href="#cb26-17" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> line.startswith(<span class="st">'#'</span>):</span>
<span id="cb26-18"><a href="#cb26-18" aria-hidden="true" tabindex="-1"></a> level <span class="op">=</span> line.count(<span class="st">'#'</span>)</span>
<span id="cb26-19"><a href="#cb26-19" aria-hidden="true" tabindex="-1"></a> doc.add_heading(line[level:].strip(), level)</span>
<span id="cb26-20"><a href="#cb26-20" aria-hidden="true" tabindex="-1"></a> <span class="cf">else</span>:</span>
<span id="cb26-21"><a href="#cb26-21" aria-hidden="true" tabindex="-1"></a> <span class="co"># Check if the line contains markdown syntax for bold or italic</span></span>
<span id="cb26-22"><a href="#cb26-22" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> <span class="st">'**'</span> <span class="kw">in</span> line <span class="kw">or</span> <span class="st">'*'</span> <span class="kw">in</span> line:</span>
<span id="cb26-23"><a href="#cb26-23" aria-hidden="true" tabindex="-1"></a> <span class="co"># Split the line into parts</span></span>
<span id="cb26-24"><a href="#cb26-24" aria-hidden="true" tabindex="-1"></a> parts <span class="op">=</span> re.split(<span class="vs">r'(\*{1,2}(.*?)\*{1,2})'</span>, line)</span>
<span id="cb26-25"><a href="#cb26-25" aria-hidden="true" tabindex="-1"></a> <span class="co"># Add another paragraph</span></span>
<span id="cb26-26"><a href="#cb26-26" aria-hidden="true" tabindex="-1"></a> p <span class="op">=</span> doc.add_paragraph()</span>
<span id="cb26-27"><a href="#cb26-27" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> i, part <span class="kw">in</span> <span class="bu">enumerate</span>(parts):</span>
<span id="cb26-28"><a href="#cb26-28" aria-hidden="true" tabindex="-1"></a> <span class="co"># Remove the markdown syntax</span></span>
<span id="cb26-29"><a href="#cb26-29" aria-hidden="true" tabindex="-1"></a> content <span class="op">=</span> part.strip(<span class="st">'*'</span>)</span>
<span id="cb26-30"><a href="#cb26-30" aria-hidden="true" tabindex="-1"></a> <span class="co"># Check if the content has been added before</span></span>
<span id="cb26-31"><a href="#cb26-31" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> content <span class="kw">not</span> <span class="kw">in</span> bolded_and_italic:</span>
<span id="cb26-32"><a href="#cb26-32" aria-hidden="true" tabindex="-1"></a> <span class="co"># Add a run with the part and format it</span></span>
<span id="cb26-33"><a href="#cb26-33" aria-hidden="true" tabindex="-1"></a> run <span class="op">=</span> p.add_run(content)</span>
<span id="cb26-34"><a href="#cb26-34" aria-hidden="true" tabindex="-1"></a> run.font.name <span class="op">=</span> <span class="st">'Arial'</span></span>
<span id="cb26-35"><a href="#cb26-35" aria-hidden="true" tabindex="-1"></a> run.font.size <span class="op">=</span> docx.shared.Pt(<span class="dv">12</span>)</span>
<span id="cb26-36"><a href="#cb26-36" aria-hidden="true" tabindex="-1"></a> <span class="co"># If the part was surrounded by **, make it bold</span></span>
<span id="cb26-37"><a href="#cb26-37" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> <span class="st">'**'</span> <span class="kw">in</span> part:</span>
<span id="cb26-38"><a href="#cb26-38" aria-hidden="true" tabindex="-1"></a> run.bold <span class="op">=</span> <span class="va">True</span></span>
<span id="cb26-39"><a href="#cb26-39" aria-hidden="true" tabindex="-1"></a> <span class="co"># If the part was surrounded by *, make it italic</span></span>
<span id="cb26-40"><a href="#cb26-40" aria-hidden="true" tabindex="-1"></a> <span class="cf">elif</span> <span class="st">'*'</span> <span class="kw">in</span> part:</span>
<span id="cb26-41"><a href="#cb26-41" aria-hidden="true" tabindex="-1"></a> run.italic <span class="op">=</span> <span class="va">True</span></span>
<span id="cb26-42"><a href="#cb26-42" aria-hidden="true" tabindex="-1"></a> <span class="co"># Add the content to the set</span></span>
<span id="cb26-43"><a href="#cb26-43" aria-hidden="true" tabindex="-1"></a> bolded_and_italic.add(content)</span>
<span id="cb26-44"><a href="#cb26-44" aria-hidden="true" tabindex="-1"></a> <span class="cf">else</span>:</span>
<span id="cb26-45"><a href="#cb26-45" aria-hidden="true" tabindex="-1"></a> <span class="co"># Add another paragraph</span></span>
<span id="cb26-46"><a href="#cb26-46" aria-hidden="true" tabindex="-1"></a> p <span class="op">=</span> doc.add_paragraph()</span>
<span id="cb26-47"><a href="#cb26-47" aria-hidden="true" tabindex="-1"></a> <span class="co"># Add a run with the line and format it</span></span>
<span id="cb26-48"><a href="#cb26-48" aria-hidden="true" tabindex="-1"></a> run <span class="op">=</span> p.add_run(line)</span>
<span id="cb26-49"><a href="#cb26-49" aria-hidden="true" tabindex="-1"></a> run.font.name <span class="op">=</span> <span class="st">'Arial'</span></span>
<span id="cb26-50"><a href="#cb26-50" aria-hidden="true" tabindex="-1"></a> run.font.size <span class="op">=</span> docx.shared.Pt(<span class="dv">12</span>)</span>
<span id="cb26-51"><a href="#cb26-51" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb26-52"><a href="#cb26-52" aria-hidden="true" tabindex="-1"></a> <span class="co">## Add a disclaimer... ----------------</span></span>
<span id="cb26-53"><a href="#cb26-53" aria-hidden="true" tabindex="-1"></a> <span class="co"># add a page break to start a new page</span></span>
<span id="cb26-54"><a href="#cb26-54" aria-hidden="true" tabindex="-1"></a> doc.add_page_break()</span>
<span id="cb26-55"><a href="#cb26-55" aria-hidden="true" tabindex="-1"></a> <span class="co"># add a heading of level 2</span></span>
<span id="cb26-56"><a href="#cb26-56" aria-hidden="true" tabindex="-1"></a> doc.add_heading(<span class="st">'DISCLAIMER:'</span>, <span class="dv">2</span>)</span>
<span id="cb26-57"><a href="#cb26-57" aria-hidden="true" tabindex="-1"></a> doc_para <span class="op">=</span> doc.add_paragraph() </span>
<span id="cb26-58"><a href="#cb26-58" aria-hidden="true" tabindex="-1"></a> doc_para.add_run(<span class="st">'This document contains material generated by artificial intelligence technology. While efforts have been made to ensure accuracy, please be aware that AI-generated content may not always fully represent the intent or expertise of human-authored material and may contain errors or inaccuracies. An AI model might generate content that sounds plausible but that is either factually incorrect or unrelated to the given context. These unexpected outcomes, also called AI hallucinations, can stem from biases, under-performing information retrieval, lack of real-world understanding, or limitations in training data.'</span>).italic <span class="op">=</span> <span class="va">True</span></span>
<span id="cb26-59"><a href="#cb26-59" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb26-60"><a href="#cb26-60" aria-hidden="true" tabindex="-1"></a> <span class="co"># Save the document ---------------</span></span>
<span id="cb26-61"><a href="#cb26-61" aria-hidden="true" tabindex="-1"></a> doc.save(file_name)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Now we can simply use this function to get a word output from the LLM answer!</p>
<div class="cell" data-execution_count="27">
<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>create_word_doc(</span>
<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a> response_recursivecharactertext_bert[<span class="st">"answer"</span>], </span>
<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a> <span class="st">"generated/Evaluation_Brief_response_recursivecharactertext_bert.docx"</span></span>
<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
</section>
</section>
<section id="continuous-evaluation-process" class="level2">
<h2 class="anchored" data-anchor-id="continuous-evaluation-process">Continuous Evaluation Process</h2>
<p>We were able to get a first brief… still how can we assess how good is this report? We will first test different settings to create the brief. Then we create a dataset reflecting those settings and evaluate it!</p>
<section id="building-alternative-briefs" class="level3">
<h3 class="anchored" data-anchor-id="building-alternative-briefs">Building Alternative Briefs</h3>
<p>Let’s try to generate more reports using different settings.</p>
<p>LangChain often integrates with libraries like <a href="https://huggingface.co/sentence-transformers" target="_blank">Hugging Face Transformers</a> for embedding usage. Best is to experiment with different embeddings to see what works best for a specific use case and dataset. There are plenty of options also depending on the languages.</p>
<p>Let’s try first with a second embedding model… <a href="https://huggingface.co/models?pipeline_tag=feature-extraction&sort=trending" target="_blank">Hugging face</a> has many options… and there is even a <a href="https://huggingface.co/spaces/mteb/leaderboard" target="_blank">leaderboard</a> to see how they compete… We will select here the embedding model <a href="https://huggingface.co/BAAI/bge-large-en-v1.5" target="_blank">bge-large-en-v1.5</a>, an over 200MB model from the Beijing Academy of Artificial Intelligence. It remains relatively small in size but isefficient and does not consume too much memory.</p>
<div class="cell" data-execution_count="28">
<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.embeddings <span class="im">import</span> HuggingFaceBgeEmbeddings</span>
<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>embeddings_bge<span class="op">=</span> HuggingFaceBgeEmbeddings(</span>
<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a> model_name<span class="op">=</span><span class="st">"BAAI/bge-small-en"</span>,</span>
<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a> model_kwargs<span class="op">=</span>{<span class="st">"device"</span>: <span class="st">"cpu"</span>}, </span>
<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a> encode_kwargs<span class="op">=</span>{<span class="st">"normalize_embeddings"</span>: <span class="va">True</span>}</span>
<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>We build the vector store using the new embedding…</p>
<div class="cell" data-execution_count="29">
<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Disable TOKENIZERS warning</span></span>
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a>os.environ[<span class="st">"TOKENIZERS_PARALLELISM"</span>] <span class="op">=</span> <span class="st">"false"</span></span>
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a>vectorstore_recursivecharactertext_bge <span class="op">=</span> Chroma.from_documents(</span>
<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a> chunks_recursivecharactertext,</span>
<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a> embeddings_bge,</span>
<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a> collection_name<span class="op">=</span> <span class="st">"recursivecharactertext_bge"</span>,</span>
<span id="cb29-9"><a href="#cb29-9" aria-hidden="true" tabindex="-1"></a> persist_directory <span class="op">=</span> <span class="st">"persist"</span> </span>
<span id="cb29-10"><a href="#cb29-10" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>We can set a different retriever now using Maximum Marginal Relevance…</p>
<div class="cell" data-execution_count="30">
<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>ragRetriever_mmr_recursivecharactertext_bge <span class="op">=</span> vectorstore_recursivecharactertext_bge.as_retriever(</span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a> search_type<span class="op">=</span><span class="st">"mmr"</span></span>
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p><a href="https://js.langchain.com/v0.1/docs/modules/data_connection/retrievers/#advanced-retrieval-types" target="_blank">Advance retrieving strategies</a> can also be used to improve the process. For instance, we can test:</p>
<ul>
<li>using <strong>ParentDocumentRetriever</strong> a document can be embedded into small chunks, and then the context that “surrounds” the found context -child documents - is retrieved using Dense Vector Retrieval, child documents are merged based on their parents. If they have the same parents – they become merged and the child documents with their respective parent documents are replace from an in-memory-store and the parent documents get used to augment generation.</li>
</ul>
<div class="cell" data-execution_count="31">
<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.retrievers <span class="im">import</span> ParentDocumentRetriever</span>
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>parent_splitter <span class="op">=</span> RecursiveCharacterTextSplitter(chunk_size<span class="op">=</span><span class="dv">1536</span>)</span>
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>child_splitter <span class="op">=</span> RecursiveCharacterTextSplitter(chunk_size<span class="op">=</span><span class="dv">256</span>)</span>
<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.storage <span class="im">import</span> InMemoryStore</span>
<span id="cb31-6"><a href="#cb31-6" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> InMemoryStore()</span>
<span id="cb31-7"><a href="#cb31-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb31-8"><a href="#cb31-8" aria-hidden="true" tabindex="-1"></a>ragRetriever_parent_recursivecharactertext_bge <span class="op">=</span> ParentDocumentRetriever(</span>
<span id="cb31-9"><a href="#cb31-9" aria-hidden="true" tabindex="-1"></a> vectorstore<span class="op">=</span> vectorstore_recursivecharactertext_bge,</span>
<span id="cb31-10"><a href="#cb31-10" aria-hidden="true" tabindex="-1"></a> docstore<span class="op">=</span>store,</span>
<span id="cb31-11"><a href="#cb31-11" aria-hidden="true" tabindex="-1"></a> child_splitter<span class="op">=</span>child_splitter,</span>
<span id="cb31-12"><a href="#cb31-12" aria-hidden="true" tabindex="-1"></a> parent_splitter<span class="op">=</span>parent_splitter,</span>
<span id="cb31-13"><a href="#cb31-13" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb31-14"><a href="#cb31-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb31-15"><a href="#cb31-15" aria-hidden="true" tabindex="-1"></a>ragRetriever_parent_recursivecharactertext_bge.add_documents(docs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<ul>
<li><strong>Ensemble retrieval</strong> is another technique where a Retriever Pair is created with on one side a sparse retriever (like <a href="https://en.wikipedia.org/wiki/Okapi_BM25" target="_blank">Okapi BM25</a>) and a dense retriever (like the embedding similarity we saw before) on the other side. Then retrieved information is “fused” based on their weighting using the Reciprocal Rank Fusion algorithm into a single ranked list and the resulting documents is used to augment the generation. This same approach has also been experimented by the <a href="https://blogs.worldbank.org/en/opendata/beyond-keywords--ai-driven-approaches-to-improve-data-discoverab0">World Bank</a></li>
</ul>
<div class="cell" data-execution_count="32">
<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.retrievers <span class="im">import</span> BM25Retriever</span>
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>retriever_bm25 <span class="op">=</span> BM25Retriever.from_documents(chunks_recursivecharactertext)</span>
<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>retriever_bm25.k <span class="op">=</span> <span class="dv">3</span></span>
<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a>retriever_similarity <span class="op">=</span> vectorstore_recursivecharactertext_bge.as_retriever(search_kwargs<span class="op">=</span>{<span class="st">"k"</span>: <span class="dv">3</span>})</span>
<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-7"><a href="#cb32-7" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.retrievers <span class="im">import</span> EnsembleRetriever</span>
<span id="cb32-8"><a href="#cb32-8" aria-hidden="true" tabindex="-1"></a>ragRetriever_ensemble_recursivecharactertext_bge <span class="op">=</span> EnsembleRetriever(</span>
<span id="cb32-9"><a href="#cb32-9" aria-hidden="true" tabindex="-1"></a> retrievers<span class="op">=</span>[retriever_bm25, retriever_similarity], </span>
<span id="cb32-10"><a href="#cb32-10" aria-hidden="true" tabindex="-1"></a> <span class="co"># Relative weighting of each retriever needs to sums to 1!</span></span>
<span id="cb32-11"><a href="#cb32-11" aria-hidden="true" tabindex="-1"></a> weights<span class="op">=</span>[<span class="fl">0.42</span>, <span class="fl">0.58</span>]</span>
<span id="cb32-12"><a href="#cb32-12" aria-hidden="true" tabindex="-1"></a> )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>We can also use a different model for the LLM: <a href="https://cohere.com/blog/command-r" target="_blank">command-r</a> from the start-up COHERE, and specifically the quantized version: <a href="https://ollama.com/library/command-r:35b-v0.1-q4_K_M" target="_blank">command-r:35b-v0.1-q4_K_M</a>, an open-weight model designed to optimize RAG and set up the corresponding chain.</p>
<div class="cell" data-execution_count="33">
<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.chat_models <span class="im">import</span> ChatOllama</span>
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>ollama_commandR <span class="op">=</span> ChatOllama(</span>
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a> model<span class="op">=</span><span class="st">"command-r:35b-v0.1-q4_K_M"</span>, </span>
<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a> temperature<span class="op">=</span><span class="fl">0.2</span>, </span>
<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a> request_timeout<span class="op">=</span><span class="dv">500</span></span>
<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb33-8"><a href="#cb33-8" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.chains.combine_documents <span class="im">import</span> create_stuff_documents_chain</span>
<span id="cb33-9"><a href="#cb33-9" aria-hidden="true" tabindex="-1"></a>combine_docs_chain_commandR <span class="op">=</span> create_stuff_documents_chain(</span>
<span id="cb33-10"><a href="#cb33-10" aria-hidden="true" tabindex="-1"></a> ollama_commandR ,</span>
<span id="cb33-11"><a href="#cb33-11" aria-hidden="true" tabindex="-1"></a> prompt_retrieval</span>
<span id="cb33-12"><a href="#cb33-12" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Finally we generate our alternative summaries!</p>
<div class="cell" data-execution_count="34">
<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.chains <span class="im">import</span> create_retrieval_chain</span>
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>qa_chain_mmr_recursivecharactertext_bge <span class="op">=</span> create_retrieval_chain(</span>
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a> ragRetriever_mmr_recursivecharactertext_bge,</span>
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a> combine_docs_chain_commandR</span>
<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb34-7"><a href="#cb34-7" aria-hidden="true" tabindex="-1"></a>response_mmr_recursivecharactertext_bge <span class="op">=</span> qa_chain_mmr_recursivecharactertext_bge.invoke({<span class="st">"input"</span>: RAG_prompt}) </span>
<span id="cb34-8"><a href="#cb34-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb34-9"><a href="#cb34-9" aria-hidden="true" tabindex="-1"></a>create_word_doc(</span>
<span id="cb34-10"><a href="#cb34-10" aria-hidden="true" tabindex="-1"></a> response_mmr_recursivecharactertext_bge[<span class="st">"answer"</span>], </span>
<span id="cb34-11"><a href="#cb34-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"generated/Evaluation_Brief_response_mmr_recursivecharactertext_bge.docx"</span></span>
<span id="cb34-12"><a href="#cb34-12" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="35">
<div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.chains <span class="im">import</span> create_retrieval_chain</span>
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>qa_chain_parent_recursivecharactertext_bge <span class="op">=</span> create_retrieval_chain(</span>
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a> ragRetriever_parent_recursivecharactertext_bge,</span>
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a> combine_docs_chain_commandR</span>
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a>response_parent_recursivecharactertext_bge <span class="op">=</span> qa_chain_parent_recursivecharactertext_bge.invoke({<span class="st">"input"</span>: RAG_prompt}) </span>
<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a>create_word_doc(</span>
<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a> response_parent_recursivecharactertext_bge[<span class="st">"answer"</span>], </span>
<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"generated/Evaluation_Brief_response_parent_recursivecharactertext_bge.docx"</span></span>
<span id="cb35-12"><a href="#cb35-12" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-execution_count="36">
<div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.chains <span class="im">import</span> create_retrieval_chain</span>
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>qa_chain_ensemble_recursivecharactertext_bge <span class="op">=</span> create_retrieval_chain(</span>
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a> ragRetriever_ensemble_recursivecharactertext_bge,</span>
<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a> combine_docs_chain_commandR</span>
<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a>response_ensemble_recursivecharactertext_bge <span class="op">=</span> qa_chain_ensemble_recursivecharactertext_bge.invoke({<span class="st">"input"</span>: RAG_prompt}) </span>
<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a>create_word_doc(</span>
<span id="cb36-10"><a href="#cb36-10" aria-hidden="true" tabindex="-1"></a> response_ensemble_recursivecharactertext_bge[<span class="st">"answer"</span>], </span>
<span id="cb36-11"><a href="#cb36-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"generated/Evaluation_Brief_response_ensemble_recursivecharactertext_bge.docx"</span></span>
<span id="cb36-12"><a href="#cb36-12" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Et voila! We have now 4 alternative briefs:</p>
<ul>
<li><p><a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_recursivecharactertext_bert.docx" target="_blank">#1 - Similarity retrieval with Bert embedding using Mixtral LLM</a></p></li>
<li><p><a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_mmr_recursivecharactertext_bge.docx" target="_blank">#2 - Maximum Marginal Relevance retrieval with BGE embedding using commandR LLM</a>,</p></li>
<li><p><a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_parent_recursivecharactertext_bge.docx" target="_blank">#3 - Parent document retrieval with BGE embedding using commandR LLM</a>,</p></li>
<li><p><a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/Evaluation_Brief_response_ensemble_recursivecharactertext_bge.docx" target="_blank">#4 - Ensemble document retrieval with BGE embedding using commandR LLM</a>,</p></li>
</ul>
<p>Each summary is slightly different… which is OK as it would be if it was a human doing it.. Though, it is likely that one report is better than the other.</p>
<p>Now let’s <strong>evaluate</strong> the quality of those summarization pipeline to objectively find out about this!</p>
</section>
<section id="generating-evaluation-dataset" class="level3">
<h3 class="anchored" data-anchor-id="generating-evaluation-dataset">Generating Evaluation Dataset</h3>
<p>To do the evaluation, first we need to build an large-enough evaluation dataset so that the evaluation is based on multiple output. We need to build the following data</p>
<ul>
<li><p><strong>question</strong>: list[str] - These are the questions the RAG pipeline will be evaluated on.</p></li>
<li><p><strong>contexts</strong>: list[list[str]] - The contexts which were retrieved and passed into the LLM corresponding to each question. This is a list[list] since each question can retrieve multiple text chunks.</p></li>
<li><p><strong>answer</strong>: list[str] - The answer that got generated from the RAG pipeline.</p></li>
</ul>
<p>One approach is to extract from the report both:</p>
<ul>
<li><p>all <strong>findings and evidence</strong>, i.e. what can be learnt from the specific context of this evaluation study, what are the root causes for the finding in this context and what are the main risks and difficulties in this context.</p></li>
<li><p>all <strong>recommendations</strong>, flagging clearly if the recommendations relate to practices that should be either discontinued on one side or on the other side to practices that should be scaled up and of if they comes with resource allocation requirement.</p></li>
</ul>
<p>To provide more perspectives for the extraction, the report can be reviewed by 26 different type of experts that may look at UNHCR programme with different angles:</p>
<ul>
<li><p>4 experts for <strong>Strategic Impact</strong>: i.e., findings or recommendations that require a change in existing policies and regulations in relation within the specific impact area:</p>
<ol type="1">
<li>Attaining favorable protection environments</li>
<li>Realizing rights in safe environments</li>
<li>Empowering communities and achieving gender equality</li>
<li>Securing durable solutions</li>
</ol></li>
<li><p>17 experts for <strong>Operational Outcome</strong>: i.e., findings or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities in relation within the specific outcome area:</p>
<ol type="1">
<li>Access to territory registration and documentation</li>
<li>Status determination</li>
<li>Protection policy and law</li>
<li>Gender-based violence</li>
<li>Child protection</li>
<li>Safety and access to justice</li>
<li>Community engagement and women’s empowerment</li>
<li>Well-being and basic needs</li>
<li>Sustainable housing and settlements</li>
<li>Healthy lives</li>
<li>Education</li>
<li>Clean water sanitation and hygiene</li>
<li>Self-reliance, Economic inclusion, and livelihoods</li>
<li>Voluntary repatriation and sustainable reintegration</li>
<li>Resettlement and complementary pathways</li>
<li>Local integration and other local solutions</li>
</ol></li>
<li><p>5 experts on <strong>Organizational Enabler</strong>: i.e., findings or recommendations that require changes in management practices, technical approach, business processes, staffing allocation or capacity building in relation with:</p>
<ol type="1">
<li>Systems and processes</li>
<li>Operational support and supply chain</li>
<li>People and culture</li>
<li>External engagement and resource mobilization</li>
<li>Leadership and governance</li>
</ol></li>
</ul>
<p>First let’s set up the prompt questions</p>
<div class="cell" data-execution_count="37">
<div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Define the list of experts on impact - outcome - organisation</span></span>
<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a>q_experts <span class="op">=</span> [</span>
<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the Strategic Impact: ---Attaining favorable protection environments---: i.e., finding or recommendations that require a change in existing policy and regulations. [/INST]"</span>,</span>
<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the Strategic Impact: ---Realizing rights in safe environments---: i.e., finding or recommendations that require a change in existing policy and regulations. [/INST]"</span>,</span>
<span id="cb37-5"><a href="#cb37-5" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the Strategic Impact: ---Empowering communities and achieving gender equality--- : i.e., finding or recommendations that require a change in existing policy and regulations. [/INST]"</span>,</span>
<span id="cb37-6"><a href="#cb37-6" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the Strategic Impact: ---Securing durable solutions--- : i.e., finding or recommendations that require a change in existing policy and regulations. [/INST]"</span>,</span>
<span id="cb37-7"><a href="#cb37-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-8"><a href="#cb37-8" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: ---Access to territory registration and documentation ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-9"><a href="#cb37-9" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Status determination ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-10"><a href="#cb37-10" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Protection policy and law---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-11"><a href="#cb37-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Gender-based violence ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-12"><a href="#cb37-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Child protection ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-13"><a href="#cb37-13" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Safety and access to justice ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-14"><a href="#cb37-14" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Community engagement and women's empowerment ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-15"><a href="#cb37-15" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Well-being and basic needs ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-16"><a href="#cb37-16" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Sustainable housing and settlements ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-17"><a href="#cb37-17" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Healthy lives---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-18"><a href="#cb37-18" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Education ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-19"><a href="#cb37-19" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Clean water sanitation and hygiene ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-20"><a href="#cb37-20" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Self-reliance, Economic inclusion, and livelihoods ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-21"><a href="#cb37-21" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Voluntary repatriation and sustainable reintegration ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-22"><a href="#cb37-22" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Resettlement and complementary pathways---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>,</span>
<span id="cb37-23"><a href="#cb37-23" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on the specific Operational Outcome: --- Local integration and other local solutions ---, i.e. finding or recommendations that require a change that needs to be implemented in the field as an adaptation or change of current activities. [/INST]"</span>, </span>
<span id="cb37-24"><a href="#cb37-24" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-25"><a href="#cb37-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-26"><a href="#cb37-26" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on Organizational Enablers related to Systems and processes, i.e. elements that require potential changes in either management practices, technical approach, business processes, staffing allocation or capacity building. [/INST]"</span>,</span>
<span id="cb37-27"><a href="#cb37-27" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on Organizational Enablers related to Operational support and supply chain, i.e. elements that require potential changes in either management practices, technical approach, business processes, staffing allocation or capacity building. [/INST]"</span> ,</span>
<span id="cb37-28"><a href="#cb37-28" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on Organizational Enablers related to People and culture, i.e. elements that require potential changes in either management practices, technical approach, business processes, staffing allocation or capacity building. [/INST]"</span> ,</span>
<span id="cb37-29"><a href="#cb37-29" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on Organizational Enablers related to External engagement and resource mobilization, i.e. elements that require potential changes in either management practices, technical approach, business processes, staffing allocation or capacity building. [/INST]"</span> ,</span>
<span id="cb37-30"><a href="#cb37-30" aria-hidden="true" tabindex="-1"></a> <span class="st">"<s> [INST] Instructions: Act as a public program evaluation expert working for UNHCR. Your specific area of expertise and focus is strictly on Organizational Enablers related to Leadership and governance, i.e. elements that require potential changes in either management practices, technical approach, business processes, staffing allocation or capacity building. [/INST]"</span> </span>
<span id="cb37-31"><a href="#cb37-31" aria-hidden="true" tabindex="-1"></a>]</span>
<span id="cb37-32"><a href="#cb37-32" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-33"><a href="#cb37-33" aria-hidden="true" tabindex="-1"></a><span class="co"># Predefined knowledge extraction questions</span></span>
<span id="cb37-34"><a href="#cb37-34" aria-hidden="true" tabindex="-1"></a>q_questions <span class="op">=</span> [</span>
<span id="cb37-35"><a href="#cb37-35" aria-hidden="true" tabindex="-1"></a> <span class="st">" List, as bullet points, all findings and evidences in relation to your specific area of expertise and focus. "</span>,</span>
<span id="cb37-36"><a href="#cb37-36" aria-hidden="true" tabindex="-1"></a> <span class="st">" Explain, in relation to your specific area of expertise and focus, what are the root causes for the situation. "</span> ,</span>
<span id="cb37-37"><a href="#cb37-37" aria-hidden="true" tabindex="-1"></a> <span class="st">" Explain, in relation to your specific area of expertise and focus, what are the main risks and difficulties here described. "</span>,</span>
<span id="cb37-38"><a href="#cb37-38" aria-hidden="true" tabindex="-1"></a> <span class="st">" Explain, in relation to your specific area of expertise and focus, what what can be learnt. "</span>,</span>
<span id="cb37-39"><a href="#cb37-39" aria-hidden="true" tabindex="-1"></a> <span class="st">" List, as bullet points, all recommendations made in relation to your specific area of expertise and focus. "</span><span class="co">#,</span></span>
<span id="cb37-40"><a href="#cb37-40" aria-hidden="true" tabindex="-1"></a> <span class="co"># "Indicate if mentionnend what resource will be required to implement the recommendations made in relation to your specific area of expertise and focus. ",</span></span>
<span id="cb37-41"><a href="#cb37-41" aria-hidden="true" tabindex="-1"></a> <span class="co"># "List, as bullet points, all recommendations made in relation to your specific area of expertise and focus that relates to topics or activities recommended to be discontinued. ",</span></span>
<span id="cb37-42"><a href="#cb37-42" aria-hidden="true" tabindex="-1"></a> <span class="co"># "List, as bullet points, all recommendations made in relation to your specific area of expertise and focus that relates to topics or activities recommended to be scaled up. " </span></span>
<span id="cb37-43"><a href="#cb37-43" aria-hidden="true" tabindex="-1"></a> <span class="co"># Add more questions here...</span></span>
<span id="cb37-44"><a href="#cb37-44" aria-hidden="true" tabindex="-1"></a>]</span>
<span id="cb37-45"><a href="#cb37-45" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-46"><a href="#cb37-46" aria-hidden="true" tabindex="-1"></a><span class="co">## Additional instructions!</span></span>
<span id="cb37-47"><a href="#cb37-47" aria-hidden="true" tabindex="-1"></a>q_instr <span class="op">=</span> <span class="st">"""</span></span>
<span id="cb37-48"><a href="#cb37-48" aria-hidden="true" tabindex="-1"></a><span class="st"></s></span></span>
<span id="cb37-49"><a href="#cb37-49" aria-hidden="true" tabindex="-1"></a><span class="st">[INST] </span></span>
<span id="cb37-50"><a href="#cb37-50" aria-hidden="true" tabindex="-1"></a><span class="st">Keep your answer grounded in the facts of the contexts. </span></span>
<span id="cb37-51"><a href="#cb37-51" aria-hidden="true" tabindex="-1"></a><span class="st">If the contexts do not contain the facts to answer the QUESTION, return </span><span class="sc">{NONE}</span><span class="st"> </span></span>
<span id="cb37-52"><a href="#cb37-52" aria-hidden="true" tabindex="-1"></a><span class="st">Be concise in the response and when relevant include precise citations from the contexts. </span></span>
<span id="cb37-53"><a href="#cb37-53" aria-hidden="true" tabindex="-1"></a><span class="st">[/INST] </span></span>
<span id="cb37-54"><a href="#cb37-54" aria-hidden="true" tabindex="-1"></a><span class="st">"""</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Then, we can reset the 2 RAG pipleline with their respective LLMs</p>
<div class="cell" data-execution_count="38">
<div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.chat_models <span class="im">import</span> ChatOllama</span>
<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>ollama_mixtral <span class="op">=</span> ChatOllama(</span>
<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a> model<span class="op">=</span><span class="st">"mixtral:8x7b-instruct-v0.1-q4_K_M"</span>, </span>
<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a> temperature<span class="op">=</span><span class="fl">0.2</span>, </span>
<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a> request_timeout<span class="op">=</span><span class="dv">500</span></span>
<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb38-7"><a href="#cb38-7" aria-hidden="true" tabindex="-1"></a>ollama_commandR <span class="op">=</span> ChatOllama(</span>
<span id="cb38-8"><a href="#cb38-8" aria-hidden="true" tabindex="-1"></a> model<span class="op">=</span><span class="st">"command-r:35b-v0.1-q4_K_M"</span>, </span>
<span id="cb38-9"><a href="#cb38-9" aria-hidden="true" tabindex="-1"></a> temperature<span class="op">=</span><span class="fl">0.2</span>, </span>
<span id="cb38-10"><a href="#cb38-10" aria-hidden="true" tabindex="-1"></a> request_timeout<span class="op">=</span><span class="dv">500</span></span>
<span id="cb38-11"><a href="#cb38-11" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Then the 2 embeding models</p>
<div class="cell" data-execution_count="39">
<div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.embeddings <span class="im">import</span> GPT4AllEmbeddings </span>
<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a>embeddings_bert <span class="op">=</span> GPT4AllEmbeddings(</span>
<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a> model_name <span class="op">=</span> <span class="st">"all-MiniLM-L6-v2.gguf2.f16.gguf"</span></span>
<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.embeddings <span class="im">import</span> HuggingFaceBgeEmbeddings</span>
<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a>embeddings_bge<span class="op">=</span> HuggingFaceBgeEmbeddings(</span>
<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a> model_name<span class="op">=</span><span class="st">"BAAI/bge-small-en"</span>,</span>
<span id="cb39-9"><a href="#cb39-9" aria-hidden="true" tabindex="-1"></a> model_kwargs<span class="op">=</span>{<span class="st">"device"</span>: <span class="st">"cpu"</span>}, </span>
<span id="cb39-10"><a href="#cb39-10" aria-hidden="true" tabindex="-1"></a> encode_kwargs<span class="op">=</span>{<span class="st">"normalize_embeddings"</span>: <span class="va">True</span>}</span>
<span id="cb39-11"><a href="#cb39-11" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Now we reload the 2 previous vector store</p>
<div class="cell" data-execution_count="40">
<div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_community.vectorstores <span class="im">import</span> Chroma</span>
<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> chromadb</span>
<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> chromadb.PersistentClient(path<span class="op">=</span><span class="st">"persist/"</span>)</span>
<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb40-5"><a href="#cb40-5" aria-hidden="true" tabindex="-1"></a>vectorstore_recursivecharactertext_bert <span class="op">=</span> Chroma(</span>
<span id="cb40-6"><a href="#cb40-6" aria-hidden="true" tabindex="-1"></a> collection_name<span class="op">=</span><span class="st">"recursivecharactertext_bert"</span>,</span>
<span id="cb40-7"><a href="#cb40-7" aria-hidden="true" tabindex="-1"></a> persist_directory<span class="op">=</span><span class="st">"persist/"</span>, </span>
<span id="cb40-8"><a href="#cb40-8" aria-hidden="true" tabindex="-1"></a> embedding_function<span class="op">=</span>embeddings_bert</span>
<span id="cb40-9"><a href="#cb40-9" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb40-10"><a href="#cb40-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb40-11"><a href="#cb40-11" aria-hidden="true" tabindex="-1"></a>vectorstore_recursivecharactertext_bge <span class="op">=</span> Chroma(</span>
<span id="cb40-12"><a href="#cb40-12" aria-hidden="true" tabindex="-1"></a> collection_name<span class="op">=</span><span class="st">"recursivecharactertext_bge"</span>,</span>
<span id="cb40-13"><a href="#cb40-13" aria-hidden="true" tabindex="-1"></a> persist_directory<span class="op">=</span><span class="st">"persist/"</span>, </span>
<span id="cb40-14"><a href="#cb40-14" aria-hidden="true" tabindex="-1"></a> embedding_function<span class="op">=</span>embeddings_bge</span>
<span id="cb40-15"><a href="#cb40-15" aria-hidden="true" tabindex="-1"></a>) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>and related retrievers</p>
<div class="cell" data-execution_count="41">
<div class="sourceCode cell-code" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>ragRetriever_recursivecharactertext_bert <span class="op">=</span> vectorstore_recursivecharactertext_bert.as_retriever()</span>
<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a>ragRetriever_mmr_recursivecharactertext_bge <span class="op">=</span> vectorstore_recursivecharactertext_bge.as_retriever(</span>
<span id="cb41-4"><a href="#cb41-4" aria-hidden="true" tabindex="-1"></a> search_type<span class="op">=</span><span class="st">"mmr"</span></span>
<span id="cb41-5"><a href="#cb41-5" aria-hidden="true" tabindex="-1"></a>) </span>
<span id="cb41-6"><a href="#cb41-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-7"><a href="#cb41-7" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.retrievers <span class="im">import</span> ParentDocumentRetriever</span>
<span id="cb41-8"><a href="#cb41-8" aria-hidden="true" tabindex="-1"></a>parent_splitter <span class="op">=</span> RecursiveCharacterTextSplitter(chunk_size<span class="op">=</span><span class="dv">1536</span>)</span>
<span id="cb41-9"><a href="#cb41-9" aria-hidden="true" tabindex="-1"></a>child_splitter <span class="op">=</span> RecursiveCharacterTextSplitter(chunk_size<span class="op">=</span><span class="dv">256</span>)</span>
<span id="cb41-10"><a href="#cb41-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-11"><a href="#cb41-11" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.storage <span class="im">import</span> InMemoryStore</span>
<span id="cb41-12"><a href="#cb41-12" aria-hidden="true" tabindex="-1"></a>store <span class="op">=</span> InMemoryStore()</span>
<span id="cb41-13"><a href="#cb41-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-14"><a href="#cb41-14" aria-hidden="true" tabindex="-1"></a>ragRetriever_parent_recursivecharactertext_bge <span class="op">=</span> ParentDocumentRetriever(</span>
<span id="cb41-15"><a href="#cb41-15" aria-hidden="true" tabindex="-1"></a> vectorstore<span class="op">=</span> vectorstore_recursivecharactertext_bge,</span>
<span id="cb41-16"><a href="#cb41-16" aria-hidden="true" tabindex="-1"></a> docstore<span class="op">=</span>store,</span>
<span id="cb41-17"><a href="#cb41-17" aria-hidden="true" tabindex="-1"></a> child_splitter<span class="op">=</span>child_splitter,</span>
<span id="cb41-18"><a href="#cb41-18" aria-hidden="true" tabindex="-1"></a> parent_splitter<span class="op">=</span>parent_splitter,</span>
<span id="cb41-19"><a href="#cb41-19" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb41-20"><a href="#cb41-20" aria-hidden="true" tabindex="-1"></a>ragRetriever_parent_recursivecharactertext_bge.add_documents(docs)</span>
<span id="cb41-21"><a href="#cb41-21" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-22"><a href="#cb41-22" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.retrievers <span class="im">import</span> BM25Retriever</span>
<span id="cb41-23"><a href="#cb41-23" aria-hidden="true" tabindex="-1"></a>retriever_bm25 <span class="op">=</span> BM25Retriever.from_documents(chunks_recursivecharactertext)</span>
<span id="cb41-24"><a href="#cb41-24" aria-hidden="true" tabindex="-1"></a>retriever_bm25.k <span class="op">=</span> <span class="dv">3</span></span>
<span id="cb41-25"><a href="#cb41-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-26"><a href="#cb41-26" aria-hidden="true" tabindex="-1"></a>retriever_similarity <span class="op">=</span> vectorstore_recursivecharactertext_bge.as_retriever(search_kwargs<span class="op">=</span>{<span class="st">"k"</span>: <span class="dv">3</span>})</span>
<span id="cb41-27"><a href="#cb41-27" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-28"><a href="#cb41-28" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.retrievers <span class="im">import</span> EnsembleRetriever</span>
<span id="cb41-29"><a href="#cb41-29" aria-hidden="true" tabindex="-1"></a>ragRetriever_ensemble_recursivecharactertext_bge <span class="op">=</span> EnsembleRetriever(</span>
<span id="cb41-30"><a href="#cb41-30" aria-hidden="true" tabindex="-1"></a> retrievers<span class="op">=</span>[retriever_bm25, retriever_similarity], </span>
<span id="cb41-31"><a href="#cb41-31" aria-hidden="true" tabindex="-1"></a> <span class="co"># Relative weighting of each retriever needs to sums to 1!</span></span>
<span id="cb41-32"><a href="#cb41-32" aria-hidden="true" tabindex="-1"></a> weights<span class="op">=</span>[<span class="fl">0.42</span>, <span class="fl">0.58</span>]</span>
<span id="cb41-33"><a href="#cb41-33" aria-hidden="true" tabindex="-1"></a> )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The main prompt template</p>
<div class="cell" data-execution_count="42">
<div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain_core.prompts <span class="im">import</span> ChatPromptTemplate</span>
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a>prompt_retrieval <span class="op">=</span> ChatPromptTemplate.from_template(</span>
<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a><span class="st">"""Answer the following question based only on the provided context:</span></span>
<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a><span class="st"><context></span></span>
<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a><span class="sc">{context}</span></span>
<span id="cb42-6"><a href="#cb42-6" aria-hidden="true" tabindex="-1"></a><span class="st"></context></span></span>
<span id="cb42-7"><a href="#cb42-7" aria-hidden="true" tabindex="-1"></a><span class="st">Question: </span><span class="sc">{input}</span><span class="st">"""</span></span>
<span id="cb42-8"><a href="#cb42-8" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>and last the retrieval chain!</p>
<div class="cell" data-execution_count="43">
<div class="sourceCode cell-code" id="cb43"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.chains.combine_documents <span class="im">import</span> create_stuff_documents_chain</span>
<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> langchain.chains <span class="im">import</span> create_retrieval_chain</span>
<span id="cb43-3"><a href="#cb43-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb43-4"><a href="#cb43-4" aria-hidden="true" tabindex="-1"></a>combine_docs_chain_mixtral <span class="op">=</span> create_stuff_documents_chain(</span>
<span id="cb43-5"><a href="#cb43-5" aria-hidden="true" tabindex="-1"></a> ollama_mixtral ,</span>
<span id="cb43-6"><a href="#cb43-6" aria-hidden="true" tabindex="-1"></a> prompt_retrieval</span>
<span id="cb43-7"><a href="#cb43-7" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb43-8"><a href="#cb43-8" aria-hidden="true" tabindex="-1"></a>qa_chain_mixtral_recursivecharactertext_bert <span class="op">=</span> create_retrieval_chain(</span>
<span id="cb43-9"><a href="#cb43-9" aria-hidden="true" tabindex="-1"></a> ragRetriever_recursivecharactertext_bert, </span>
<span id="cb43-10"><a href="#cb43-10" aria-hidden="true" tabindex="-1"></a> combine_docs_chain_mixtral</span>
<span id="cb43-11"><a href="#cb43-11" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb43-12"><a href="#cb43-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb43-13"><a href="#cb43-13" aria-hidden="true" tabindex="-1"></a>combine_docs_chain_command <span class="op">=</span> create_stuff_documents_chain(</span>
<span id="cb43-14"><a href="#cb43-14" aria-hidden="true" tabindex="-1"></a> ollama_commandR,</span>
<span id="cb43-15"><a href="#cb43-15" aria-hidden="true" tabindex="-1"></a> prompt_retrieval</span>
<span id="cb43-16"><a href="#cb43-16" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb43-17"><a href="#cb43-17" aria-hidden="true" tabindex="-1"></a>qa_chain_command_mmr_recursivecharactertext_bge <span class="op">=</span> create_retrieval_chain(</span>
<span id="cb43-18"><a href="#cb43-18" aria-hidden="true" tabindex="-1"></a> ragRetriever_mmr_recursivecharactertext_bge, </span>
<span id="cb43-19"><a href="#cb43-19" aria-hidden="true" tabindex="-1"></a> combine_docs_chain_command</span>
<span id="cb43-20"><a href="#cb43-20" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb43-21"><a href="#cb43-21" aria-hidden="true" tabindex="-1"></a>qa_chain_command_parent_recursivecharactertext_bge <span class="op">=</span> create_retrieval_chain(</span>
<span id="cb43-22"><a href="#cb43-22" aria-hidden="true" tabindex="-1"></a> ragRetriever_parent_recursivecharactertext_bge, </span>
<span id="cb43-23"><a href="#cb43-23" aria-hidden="true" tabindex="-1"></a> combine_docs_chain_command</span>
<span id="cb43-24"><a href="#cb43-24" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb43-25"><a href="#cb43-25" aria-hidden="true" tabindex="-1"></a>qa_chain_command_ensemble_recursivecharactertext_bge <span class="op">=</span> create_retrieval_chain(</span>
<span id="cb43-26"><a href="#cb43-26" aria-hidden="true" tabindex="-1"></a> ragRetriever_ensemble_recursivecharactertext_bge, </span>
<span id="cb43-27"><a href="#cb43-27" aria-hidden="true" tabindex="-1"></a> combine_docs_chain_command</span>
<span id="cb43-28"><a href="#cb43-28" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>and now build the two evaluation dataset by iterating over expert profiles and questions!</p>
<p>The <a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/dataset_mixtral_recursivecharactertext_bert.xlsx" target="_blank">first dataset</a></p>
<div class="cell" data-execution_count="44">
<div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create dataset (empty list for now)</span></span>
<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a>dataset_mixtral_recursivecharactertext_bert <span class="op">=</span> []</span>
<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Iterate through each expert question and its corresponding context list</span></span>
<span id="cb44-5"><a href="#cb44-5" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> expert <span class="kw">in</span> q_experts:</span>
<span id="cb44-6"><a href="#cb44-6" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> question <span class="kw">in</span> q_questions:</span>
<span id="cb44-7"><a href="#cb44-7" aria-hidden="true" tabindex="-1"></a> <span class="co"># Generate response </span></span>
<span id="cb44-8"><a href="#cb44-8" aria-hidden="true" tabindex="-1"></a> response <span class="op">=</span> qa_chain_mixtral_recursivecharactertext_bert.invoke({<span class="st">"input"</span>: expert <span class="op">+</span> question <span class="op">+</span> q_instr})</span>
<span id="cb44-9"><a href="#cb44-9" aria-hidden="true" tabindex="-1"></a> <span class="co"># Add context-question-response to dataset</span></span>
<span id="cb44-10"><a href="#cb44-10" aria-hidden="true" tabindex="-1"></a> dataset_mixtral_recursivecharactertext_bert.append({</span>
<span id="cb44-11"><a href="#cb44-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"question"</span>: expert <span class="op">+</span> question <span class="op">+</span> q_instr,</span>
<span id="cb44-12"><a href="#cb44-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"contexts"</span>: [context.page_content <span class="cf">for</span> context <span class="kw">in</span> response[<span class="st">"context"</span>]],</span>
<span id="cb44-13"><a href="#cb44-13" aria-hidden="true" tabindex="-1"></a> <span class="st">"answer"</span>: response[<span class="st">"answer"</span>]</span>
<span id="cb44-14"><a href="#cb44-14" aria-hidden="true" tabindex="-1"></a> })</span>
<span id="cb44-15"><a href="#cb44-15" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb44-16"><a href="#cb44-16" aria-hidden="true" tabindex="-1"></a><span class="co">#Save this to the disk! </span></span>
<span id="cb44-17"><a href="#cb44-17" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
<span id="cb44-18"><a href="#cb44-18" aria-hidden="true" tabindex="-1"></a>dataset_mixtral_recursivecharactertext_bert_d <span class="op">=</span> pd.DataFrame(dataset_mixtral_recursivecharactertext_bert)</span>
<span id="cb44-19"><a href="#cb44-19" aria-hidden="true" tabindex="-1"></a>dataset_mixtral_recursivecharactertext_bert_d.to_excel(<span class="st">"dataset/dataset_mixtral_recursivecharactertext_bert.xlsx"</span>) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Then producing the <a href="https://github.com/Edouard-Legoupil/rag_extraction/raw/main/generated/dataset_command_mmr_recursivecharactertext_bge.xlsx" target="_blank">second dataset</a></p>
<div class="cell" data-execution_count="45">
<div class="sourceCode cell-code" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create dataset (empty list for now)</span></span>
<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a>dataset_command_mmr_recursivecharactertext_bge <span class="op">=</span> []</span>
<span id="cb45-3"><a href="#cb45-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb45-4"><a href="#cb45-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Iterate through each expert question and its corresponding context list</span></span>
<span id="cb45-5"><a href="#cb45-5" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> expert <span class="kw">in</span> q_experts:</span>
<span id="cb45-6"><a href="#cb45-6" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> question <span class="kw">in</span> q_questions:</span>
<span id="cb45-7"><a href="#cb45-7" aria-hidden="true" tabindex="-1"></a> <span class="co"># Generate response with Ollama</span></span>
<span id="cb45-8"><a href="#cb45-8" aria-hidden="true" tabindex="-1"></a> response <span class="op">=</span> qa_chain_command_mmr_recursivecharactertext_bge.invoke({<span class="st">"input"</span>: expert <span class="op">+</span> question <span class="op">+</span> q_instr})</span>