検出漏れ やり直し

BM25

0.75

# 0.75
/Volumes/HD/false_negative_50_jar/false_negative ❯❯❯ python3 tp_improve.py pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_greater.csv      master ✱ ◼
12716
7234
10955
5536
3561
5216

/Volumes/HD/false_negative_50_jar/false_negative ❯❯❯ go run intersection.go pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
12716
7234
6734

# 0.5
/Volumes/HD/false_negative_50_jar/false_negative ❮❮❮ python3 tp_improve.py pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greater.csv       master ✱ ◼
73761
7234
67959
5536
3561
5245

/Volumes/HD/false_negative_50_jar/false_negative ❮❮❮ go run intersection.go pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
73761
7234
6758

# 0.25
/Volumes/HD/false_negative_50_jar/false_negative ❯❯❯ python3 tp_improve.py pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_greater.csv      master ✱ ◼
1650830
7234
1375230
5536
3561
5269

/Volumes/HD/false_negative_50_jar/false_negative ❯❯❯ go run intersection.go pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
1650830
7234
6759

edit

# 0.75
/Volumes/HD/false_negative_50_jar/false_negative_edit ❮❮❮ python3 tp_improve.py pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_greater.csv

7692
7234
6110
5536
3561
4714

# やり直し
7234
51023
5536
6110
3561
4714

/Volumes/HD/false_negative_50_jar/false_negative_edit ❯❯❯ go run intersection.go pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
7692
7234
6126

# 0.5
/Volumes/HD/false_negative_50_jar/false_negative_edit ❮❮❮ python3 tp_improve.py pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greater.csv
216657
7234
177170
5536
3561
5162

/Volumes/HD/false_negative_50_jar/false_negative_edit ❮❮❮ go run intersection.go pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
216657
7234
6676

# 0.25
/Volumes/HD/false_negative_50_jar/false_negative_edit ❯❯❯ python3 tp_improve.py pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_greater.csv
1393269
7234
991656
5536
367
474

/Volumes/HD/false_negative_50_jar/false_negative_edit ❯❯❯ go run intersection.go pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_greater.csv
==> done reading from file
==> done reading from file
7014139
7234
6759

誤検出

0.25

# 正解
2262
1132
515
372
290

/Volumes/mituba_20180425/restart/sim_0.25 ❯❯❯ for i in ./compare_result/*uc* ; do awk -F ',' '{if ($3 >= 0.75) a+=1} END {print a}' "$i" ; done | awk '{a+=$1} END {print a}'
196282

## 検索結果
/Volumes/mituba_20180425/restart/sim_0.25 ❮❮❮ for l in 2gram 3gram 4gram 5gram 6gram uc ; do for i in ./search_result/*"$l"* ; do awk -F ',' '{a+=1} END {print a}' "$i" ; done | awk '{a+=$1} END {print a}';done
817414
266231
64656
28063
14657

/Volumes/mituba_20180425/restart/sim_0.25 ❯❯❯ for i in ./search_result/*uc* ; do wc $i ;done | awk '{a+=$1} END {print a}'                                     master ✱ ◼
583366

0.5

# 正解
/Volumes/mituba_20180425/restart/sim_0.5 ❯❯❯ for birth in 2gram 3gram 4gram 5gram 6gram uc ; do for i in ./compare_result/*"$birth"* ; do awk -F ',' '{if ($3 >= 0.75) a+=1} END {print a}' "$i" ; done | awk '{a+=$1} END {print a}';done
1480
883
461
351
282
91188

# 検索結k
/Volumes/mituba_20180425/restart/sim_0.5 ❯❯❯ for l in 2gram 3gram 4gram 5gram 6gram uc ;do for i in ./search_result/*"$l"* ; do wc $i ;done | awk '{a+=($1 - 2)} END {print a}' ;done
68028
13211
2738
1991
1717
112504

0.75

## 正解
/Volumes/mituba_20180425/restart/sim_0.75 ❯❯❯ for i in 2gram 3gram 4gram 5gram 6gram uc;do for i in ./compare_result/*"$i"* ; do awk -F ',' '{if ($3 >= 0.75) a+=1} END {print a}' "$i" ; done | awk '{a+=$1} END {print a}';done
1438
817
421
328
265
91155

## 検索結果
/Volumes/mituba_20180425/restart/sim_0.75 ❯❯❯ for l in 2gram 3gram 4gram 5gram 6gram uc ;do for i in ./search_result/*"$l"* ; do wc $i ;done | awk '{a+=($1 - 2)} END {print a}' ;done
3995
1260
839
760
523
99615

jw誤検出多すぎ問題

検索結果に0.75以上のものですぎ

## 閾値0.75での検索件数
1498061

## ちなみにBM25
10955

検出漏れ

0.25

/Volumes/HD/false_negative_50_jar/false_negative_jw ❯❯❯ go run intersection.go pro_result_parse/2gram-0.25.csv ../pre_result_remove/2grampre_0.75_great
er.csv
==> done reading from file
==> done reading from file
9506980
7234
6759

0.5


/Volumes/HD/false_negative_50_jar/false_negative_jw ❮❮❮ go run intersection.go pro_result_parse/2gram-0.5.csv ../pre_result_remove/2grampre_0.75_greate
r.csv
==> done reading from file
==> done reading from file
9506769
7234
6759

0.75

/Volumes/HD/false_negative_50_jar/false_negative_jw ❯❯❯ go run intersection.go pro_result_parse/2gram-0.75.csv ../pre_result_remove/2grampre_0.75_great
er.csv
==> done reading from file
==> done reading from file
1691678
7234
6758