123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- package chinese
- import (
- "bytes"
- "fmt"
- "io"
- "net/http"
- "strings"
- "time"
- "go-common/library/log"
- "github.com/go-ego/cedar"
- )
- // dict contains the Trie and dict values
- type dict struct {
- Trie *cedar.Cedar
- Values [][]string
- }
- // BuildFromFile builds the da dict from fileName
- func buildFromFile(fileName string) (*dict, error) {
- var err error
- trie := cedar.New()
- values := [][]string{}
- bs := raw(fileName)
- strs := strings.Split(string(bs), "\n")
- for _, line := range strs {
- items := strings.Split(strings.TrimSpace(line), "\t")
- if len(items) < 2 {
- continue
- }
- err = trie.Insert([]byte(items[0]), len(values))
- if err != nil {
- return nil, err
- }
- if len(items) > 2 {
- values = append(values, items[1:])
- } else {
- values = append(values, strings.Fields(items[1]))
- }
- }
- return &dict{Trie: trie, Values: values}, nil
- }
- // prefixMatch str by Dict, returns the matched string and its according values
- func (d *dict) prefixMatch(str string) (map[string][]string, error) {
- if d.Trie == nil {
- return nil, fmt.Errorf("Trie is nil")
- }
- res := make(map[string][]string)
- for _, id := range d.Trie.PrefixMatch([]byte(str), 0) {
- key, err := d.Trie.Key(id)
- if err != nil {
- return nil, err
- }
- value, err := d.Trie.Value(id)
- if err != nil {
- return nil, err
- }
- res[string(key)] = d.Values[value]
- }
- return res, nil
- }
- var (
- defaultRead int64 = 16 * 1024 // 16kb
- defaultURL = "http://i0.hdslb.com/bfs/static/"
- )
- func raw(file string) (bs []byte) {
- client := http.Client{Timeout: 10 * time.Second}
- for i := 0; i < 3; i++ {
- resp, err := client.Get(defaultURL + file)
- if err != nil || resp.StatusCode != http.StatusOK {
- log.Error("bfs client url:%s file:%s err:%+v", defaultURL, file, err)
- time.Sleep(time.Millisecond * 50)
- continue
- }
- defer resp.Body.Close()
- bs, err = readAll(resp.Body, defaultRead)
- if err == nil {
- return
- }
- log.Error("bfs client url:%s file:%s err:%+v", defaultURL, file, err)
- }
- return
- }
- func readAll(r io.Reader, capacity int64) (b []byte, err error) {
- buf := bytes.NewBuffer(make([]byte, 0, capacity))
- defer func() {
- e := recover()
- if e == nil {
- return
- }
- if panicErr, ok := e.(error); ok && panicErr == bytes.ErrTooLarge {
- err = panicErr
- } else {
- panic(e)
- }
- }()
- _, err = buf.ReadFrom(r)
- return buf.Bytes(), err
- }
|