検出漏れ やり直し
BM25
0.75
# 0.75
/Volumes/HD/false_negative_50_jar/false_negative ❯❯❯ python3 tp_improve.py pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_greater.csv master ✱ ◼
12716
7234
10955
5536
3561
5216
/Volumes/HD/false_negative_50_jar/false_negative ❯❯❯ go run intersection.go pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
12716
7234
6734
# 0.5
/Volumes/HD/false_negative_50_jar/false_negative ❮❮❮ python3 tp_improve.py pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greater.csv master ✱ ◼
73761
7234
67959
5536
3561
5245
/Volumes/HD/false_negative_50_jar/false_negative ❮❮❮ go run intersection.go pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
73761
7234
6758
# 0.25
/Volumes/HD/false_negative_50_jar/false_negative ❯❯❯ python3 tp_improve.py pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_greater.csv master ✱ ◼
1650830
7234
1375230
5536
3561
5269
/Volumes/HD/false_negative_50_jar/false_negative ❯❯❯ go run intersection.go pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
1650830
7234
6759
edit
# 0.75
/Volumes/HD/false_negative_50_jar/false_negative_edit ❮❮❮ python3 tp_improve.py pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_greater.csv
7692
7234
6110
5536
3561
4714
# やり直し
7234
51023
5536
6110
3561
4714
/Volumes/HD/false_negative_50_jar/false_negative_edit ❯❯❯ go run intersection.go pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
7692
7234
6126
# 0.5
/Volumes/HD/false_negative_50_jar/false_negative_edit ❮❮❮ python3 tp_improve.py pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greater.csv
216657
7234
177170
5536
3561
5162
/Volumes/HD/false_negative_50_jar/false_negative_edit ❮❮❮ go run intersection.go pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
216657
7234
6676
# 0.25
/Volumes/HD/false_negative_50_jar/false_negative_edit ❯❯❯ python3 tp_improve.py pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_greater.csv
1393269
7234
991656
5536
367
474
/Volumes/HD/false_negative_50_jar/false_negative_edit ❯❯❯ go run intersection.go pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
7014139
7234
6759
誤検出
0.25
# 正解
2262
1132
515
372
290
/Volumes/mituba_20180425/restart/sim_0.25 ❯❯❯ for i in ./compare_result/*uc* ; do awk -F ',' '{if ($3 >= 0.75) a+=1} END {print a}' "$i" ; done | awk '{a+=$1} END {print a}'
196282
## 検索結果
/Volumes/mituba_20180425/restart/sim_0.25 ❮❮❮ for l in 2gram 3gram 4gram 5gram 6gram uc ; do for i in ./search_result/*"$l"* ; do awk -F ',' '{a+=1} END {print a}' "$i" ; done | awk '{a+=$1} END {print a}';done
817414
266231
64656
28063
14657
/Volumes/mituba_20180425/restart/sim_0.25 ❯❯❯ for i in ./search_result/*uc* ; do wc $i ;done | awk '{a+=$1} END {print a}' master ✱ ◼
583366
0.5
# 正解
/Volumes/mituba_20180425/restart/sim_0.5 ❯❯❯ for birth in 2gram 3gram 4gram 5gram 6gram uc ; do for i in ./compare_result/*"$birth"* ; do awk -F ',' '{if ($3 >= 0.75) a+=1} END {print a}' "$i" ; done | awk '{a+=$1} END {print a}';done
1480
883
461
351
282
91188
# 検索結k
/Volumes/mituba_20180425/restart/sim_0.5 ❯❯❯ for l in 2gram 3gram 4gram 5gram 6gram uc ;do for i in ./search_result/*"$l"* ; do wc $i ;done | awk '{a+=($1 - 2)} END {print a}' ;done
68028
13211
2738
1991
1717
112504
0.75
## 正解
/Volumes/mituba_20180425/restart/sim_0.75 ❯❯❯ for i in 2gram 3gram 4gram 5gram 6gram uc;do for i in ./compare_result/*"$i"* ; do awk -F ',' '{if ($3 >= 0.75) a+=1} END {print a}' "$i" ; done | awk '{a+=$1} END {print a}';done
1438
817
421
328
265
91155
## 検索結果
/Volumes/mituba_20180425/restart/sim_0.75 ❯❯❯ for l in 2gram 3gram 4gram 5gram 6gram uc ;do for i in ./search_result/*"$l"* ; do wc $i ;done | awk '{a+=($1 - 2)} END {print a}' ;done
3995
1260
839
760
523
99615
jw誤検出多すぎ問題
検索結果に0.75以上のものですぎ
## 閾値0.75での検索件数
1498061
## ちなみにBM25
10955
検出漏れ
0.25
/Volumes/HD/false_negative_50_jar/false_negative_jw ❯❯❯ go run intersection.go pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_great
er.csv
==> done reading from file
==> done reading from file
9506980
7234
6759
0.5
/Volumes/HD/false_negative_50_jar/false_negative_jw ❮❮❮ go run intersection.go pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greate
r.csv
==> done reading from file
==> done reading from file
9506769
7234
6759
0.75
/Volumes/HD/false_negative_50_jar/false_negative_jw ❯❯❯ go run intersection.go pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_great
er.csv
==> done reading from file
==> done reading from file
1691678
7234
6758