Move LMS scoring under new Domain Heuristics
This commit is contained in:
@@ -4,16 +4,13 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// LmsScoreOfDomain calculates the mean longest meaningful substring of a domain. It follows some special rules to increase accuracy. It returns a value between 0 and 100, representing the length-based percentage of the meaningful substring.
|
||||
// LmsScoreOfDomain calculates the mean longest meaningful substring of a domain.
|
||||
// It follows some special rules to increase accuracy. It returns a value between
|
||||
// 0 and 100, representing the length-based percentage of the meaningful substring.
|
||||
func LmsScoreOfDomain(domain string) float64 {
|
||||
var totalScore float64
|
||||
domain = strings.ToLower(domain)
|
||||
subjects := strings.Split(domain, ".")
|
||||
// ignore the last two parts
|
||||
if len(subjects) <= 3 {
|
||||
return 100
|
||||
}
|
||||
subjects = subjects[:len(subjects)-3]
|
||||
var totalLength int
|
||||
for _, subject := range subjects {
|
||||
totalLength += len(subject)
|
||||
@@ -27,7 +24,9 @@ func LmsScoreOfDomain(domain string) float64 {
|
||||
return totalScore
|
||||
}
|
||||
|
||||
// LmsScore calculates the longest meaningful substring of a domain. It returns a value between 0 and 100, representing the length-based percentage of the meaningful substring.
|
||||
// LmsScore calculates the longest meaningful substring of a domain. It returns a
|
||||
// value between 0 and 100, representing the length-based percentage of the
|
||||
// meaningful substring.
|
||||
func LmsScore(subject string) float64 {
|
||||
lmsStart := -1
|
||||
lmsStop := -1
|
||||
|
||||
@@ -5,8 +5,8 @@ import "testing"
|
||||
func TestLmsScoreOfDomain(t *testing.T) {
|
||||
testDomain(t, "g.symcd.com.", 100, 100)
|
||||
testDomain(t, "www.google.com.", 100, 100)
|
||||
testDomain(t, "55ttt5.12abc3.test.com.", 50, 50)
|
||||
testDomain(t, "mbtq6opnuodp34gcrma65fxacgxv5ukr7lq6xuhr4mhoibe7.yvqptrozfbnqyemchpovw3q5xwjibuxfsgb72mix3znhpfhc.i2n7jh2gadqaadck3zs3vg3hbv5pkmwzeay4gc75etyettbb.isi5mhmowtfriu33uxzmgvjur5g2p3tloynwohfrggee6fkn.meop7kqyd5gwxxa3.er.spotify.com.", 0, 30)
|
||||
testDomain(t, "55ttt5.12abc3.test.com.", 68, 69)
|
||||
testDomain(t, "mbtq6opnuodp34gcrma65fxacgxv5ukr7lq6xuhr4mhoibe7.yvqptrozfbnqyemchpovw3q5xwjibuxfsgb72mix3znhpfhc.i2n7jh2gadqaadck3zs3vg3hbv5pkmwzeay4gc75etyettbb.isi5mhmowtfriu33uxzmgvjur5g2p3tloynwohfrggee6fkn.meop7kqyd5gwxxa3.er.spotify.com.", 0, 31)
|
||||
}
|
||||
|
||||
func testDomain(t *testing.T, domain string, min, max float64) {
|
||||
|
||||
Reference in New Issue
Block a user