-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluatePerformance.py
More file actions
79 lines (58 loc) · 2.05 KB
/
evaluatePerformance.py
File metadata and controls
79 lines (58 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 10 20:00 2016
@author: Federico Zarfati
"""
import pandas as pd
import sys
import numpy as np
import os
import pylab
import csv
from docutils.utils.punctuation_chars import delimiters
from fileinput import close
os.getcwd()
values = None
def avg_p_at_k(output, ground):
"""Save computed p@k in a Report.csv file
The function takes two argument: queries and ground truth.
For every p@k (1,3,5,10) the function compute the p@k
calculating the intersection between the query's results set and
the set of relevant documents. It divides the result of the intersection
by the minimum between k and the length of the ground set.
Every result is added to the previous and then divided by the number of queries.
"""
global values
counter = 0
total=0
for k in [1,3,5,10]: # For each k
for query in ground.keys(): # For each query in groundtruth
total += len(set(output[query][:k]) & set(ground[query]))*1.0/min(k,len(ground[query]))
print(k)
print(set(output[query][:k]))
counter +=1 # Query counter
# Total of the k divided by the number of the queries
values.append((total*1.0)/counter)
# Reset the counters
total = 0
counter = 0
# Save the data
with open("Report.csv", "a") as fp:
wr = csv.writer(fp, dialect='excel')
wr.writerow(values)
def main():
global values
values = []
#Read the file from the main arguments
queries = pd.read_csv(sys.argv[1], sep="\t")
ground = pd.read_csv(sys.argv[2], sep="\t" )
#Groupd the tsv file for query_id
out = queries.groupby('Query_ID')
tru = ground.groupby('Query_id')
#Create dictionaries with query_id as keys
out_dic = {k: g["Doc_ID"].tolist() for k,g in out}
tru_dic = {k: g["Relevant_Doc_id"].tolist() for k,g in tru}
#Compute the Average P@k
avg_p_at_k(out_dic,tru_dic)
if __name__=="__main__":
main()