Move LMS scoring under new Domain Heuristics

This commit is contained in:
Patrick Pacher
2020-08-11 10:27:16 +02:00
parent 85e4beafa1
commit 3b896ee892
5 changed files with 87 additions and 16 deletions

View File

@@ -4,16 +4,13 @@ import (
"strings"
)
// LmsScoreOfDomain calculates the mean longest meaningful substring of a domain. It follows some special rules to increase accuracy. It returns a value between 0 and 100, representing the length-based percentage of the meaningful substring.
// LmsScoreOfDomain calculates the mean longest meaningful substring of a domain.
// It follows some special rules to increase accuracy. It returns a value between
// 0 and 100, representing the length-based percentage of the meaningful substring.
func LmsScoreOfDomain(domain string) float64 {
var totalScore float64
domain = strings.ToLower(domain)
subjects := strings.Split(domain, ".")
// ignore the last two parts
if len(subjects) <= 3 {
return 100
}
subjects = subjects[:len(subjects)-3]
var totalLength int
for _, subject := range subjects {
totalLength += len(subject)
@@ -27,7 +24,9 @@ func LmsScoreOfDomain(domain string) float64 {
return totalScore
}
// LmsScore calculates the longest meaningful substring of a domain. It returns a value between 0 and 100, representing the length-based percentage of the meaningful substring.
// LmsScore calculates the longest meaningful substring of a domain. It returns a
// value between 0 and 100, representing the length-based percentage of the
// meaningful substring.
func LmsScore(subject string) float64 {
lmsStart := -1
lmsStop := -1

View File

@@ -5,8 +5,8 @@ import "testing"
func TestLmsScoreOfDomain(t *testing.T) {
testDomain(t, "g.symcd.com.", 100, 100)
testDomain(t, "www.google.com.", 100, 100)
testDomain(t, "55ttt5.12abc3.test.com.", 50, 50)
testDomain(t, "mbtq6opnuodp34gcrma65fxacgxv5ukr7lq6xuhr4mhoibe7.yvqptrozfbnqyemchpovw3q5xwjibuxfsgb72mix3znhpfhc.i2n7jh2gadqaadck3zs3vg3hbv5pkmwzeay4gc75etyettbb.isi5mhmowtfriu33uxzmgvjur5g2p3tloynwohfrggee6fkn.meop7kqyd5gwxxa3.er.spotify.com.", 0, 30)
testDomain(t, "55ttt5.12abc3.test.com.", 68, 69)
testDomain(t, "mbtq6opnuodp34gcrma65fxacgxv5ukr7lq6xuhr4mhoibe7.yvqptrozfbnqyemchpovw3q5xwjibuxfsgb72mix3znhpfhc.i2n7jh2gadqaadck3zs3vg3hbv5pkmwzeay4gc75etyettbb.isi5mhmowtfriu33uxzmgvjur5g2p3tloynwohfrggee6fkn.meop7kqyd5gwxxa3.er.spotify.com.", 0, 31)
}
func testDomain(t *testing.T, domain string, min, max float64) {