This repository has been archived on 2025-09-28. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
plaintext-encyclopedia/database.go

98 lines
2.7 KiB
Go
Raw Normal View History

package main
2024-08-28 20:29:34 +02:00
// Copyright (c) 2024 Julian Müller (ChaoticByte)
import (
"io/fs"
"log"
"os"
2024-08-28 19:49:23 +02:00
"slices"
"strings"
2024-08-28 19:49:23 +02:00
"golang.org/x/text/search"
)
type Database struct {
Titles map[string]string
Entries map[string]string
2024-08-28 19:49:23 +02:00
matcher *search.Matcher
}
func (db *Database) searchForIds(query string) []string { // returns keys (entry names)
2024-08-28 19:49:23 +02:00
results := []string{}
// compile patterns
queryPatterns := []*search.Pattern{}
for _, q := range strings.Split(query, " ") { // per word
queryPatterns = append(queryPatterns, db.matcher.CompileString(q))
}
// search
for k, v := range db.Entries {
2025-07-11 21:33:52 +02:00
// title (k)
titleLower := strings.ToLower(db.Titles[k])
queryLower := strings.ToLower(query)
if strings.Contains(titleLower, queryLower) {
2025-07-11 21:33:52 +02:00
results = append(results, k)
continue
}
// content body
2024-08-28 19:49:23 +02:00
patternsFound := 0
for _, p := range queryPatterns {
if s, _ := p.IndexString(v); s != -1 {
2024-08-28 19:49:23 +02:00
patternsFound++ // this pattern was found
}
}
if patternsFound == len(queryPatterns) && !slices.Contains(results, k) {
// if all patterns were found, add the key (entry name) to the list
results = append(results, k)
}
}
slices.Sort(results)
2024-08-28 19:49:23 +02:00
return results
}
func BuildDB(directory string) Database {
logger := log.Default()
logger.Println("Building database")
// files, entries
var files []string
entries := map[string]string{}
// get files in directory and read them
2024-08-28 19:49:23 +02:00
directory = strings.TrimRight(directory, "/") // we don't need that last /, don't use the root directory /
entriesDirFs := os.DirFS(directory)
files, err := fs.Glob(entriesDirFs, "*.txt")
if err != nil { logger.Panicln(err) }
titles := map[string]string{}
for _, f := range files {
k := f[:len(f)-4] // remove ".txt"
k = strings.ReplaceAll(k, "|", "_") // we don't want | because it is used in the search protocol
fileData, err := os.ReadFile(directory + "/" + f)
if err != nil { logger.Panicln(err) }
content := string(fileData)
content = strings.Trim(content, "\n\r ")
if strings.HasPrefix(content, "Title:") {
len_content := len(content)
title_start_idx := 6
var title_stop_idx int
if strings.Contains(content, "\n") {
title_stop_idx = strings.Index(content, "\n")
} else {
title_stop_idx = len_content
}
title := content[title_start_idx:title_stop_idx]
title = strings.Trim(title, " ")
body := content[title_stop_idx:len_content]
body = strings.TrimLeft(body, "\n\r")
if len(body) < 1 {
body = " "
}
titles[k] = title
entries[k] = body
} else {
titles[k] = k
entries[k] = content
}
}
2024-08-28 19:49:23 +02:00
matcher := search.New(ContentLanguage, search.IgnoreCase, search.IgnoreDiacritics)
return Database{Titles: titles, Entries: entries, matcher: matcher}
}