7
7
from cognee .eval_framework .evaluation .metrics .context_coverage import ContextCoverageMetric
8
8
from typing import Any , Dict , List
9
9
from deepeval .metrics import ContextualRelevancyMetric
10
+ import time
11
+ from cognee .shared .logging_utils import get_logger
12
+
13
+ logger = get_logger ()
10
14
11
15
12
16
class DeepEvalAdapter (BaseEvalAdapter ):
13
17
def __init__ (self ):
18
+ self .n_retries = 5
14
19
self .g_eval_metrics = {
15
20
"correctness" : self .g_eval_correctness (),
16
21
"EM" : ExactMatchMetric (),
@@ -19,6 +24,33 @@ def __init__(self):
19
24
"context_coverage" : ContextCoverageMetric (),
20
25
}
21
26
27
+ def _calculate_metric (self , metric : str , test_case : LLMTestCase ) -> Dict [str , Any ]:
28
+ """Calculate a single metric for a test case with retry logic."""
29
+ metric_to_calculate = self .g_eval_metrics [metric ]
30
+
31
+ for attempt in range (self .n_retries ):
32
+ try :
33
+ metric_to_calculate .measure (test_case )
34
+ return {
35
+ "score" : metric_to_calculate .score ,
36
+ "reason" : metric_to_calculate .reason ,
37
+ }
38
+ except Exception as e :
39
+ logger .warning (
40
+ f"Attempt { attempt + 1 } /{ self .n_retries } failed for metric '{ metric } ': { e } "
41
+ )
42
+ if attempt < self .n_retries - 1 :
43
+ time .sleep (2 ** attempt ) # Exponential backoff
44
+ else :
45
+ logger .error (
46
+ f"All { self .n_retries } attempts failed for metric '{ metric } '. Returning None values."
47
+ )
48
+
49
+ return {
50
+ "score" : None ,
51
+ "reason" : None ,
52
+ }
53
+
22
54
async def evaluate_answers (
23
55
self , answers : List [Dict [str , Any ]], evaluator_metrics : List [str ]
24
56
) -> List [Dict [str , Any ]]:
@@ -40,12 +72,7 @@ async def evaluate_answers(
40
72
)
41
73
metric_results = {}
42
74
for metric in evaluator_metrics :
43
- metric_to_calculate = self .g_eval_metrics [metric ]
44
- metric_to_calculate .measure (test_case )
45
- metric_results [metric ] = {
46
- "score" : metric_to_calculate .score ,
47
- "reason" : metric_to_calculate .reason ,
48
- }
75
+ metric_results [metric ] = self ._calculate_metric (metric , test_case )
49
76
results .append ({** answer , "metrics" : metric_results })
50
77
51
78
return results
0 commit comments