dict.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. package chinese
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "net/http"
  7. "strings"
  8. "time"
  9. "go-common/library/log"
  10. "github.com/go-ego/cedar"
  11. )
  12. // dict contains the Trie and dict values
  13. type dict struct {
  14. Trie *cedar.Cedar
  15. Values [][]string
  16. }
  17. // BuildFromFile builds the da dict from fileName
  18. func buildFromFile(fileName string) (*dict, error) {
  19. var err error
  20. trie := cedar.New()
  21. values := [][]string{}
  22. bs := raw(fileName)
  23. strs := strings.Split(string(bs), "\n")
  24. for _, line := range strs {
  25. items := strings.Split(strings.TrimSpace(line), "\t")
  26. if len(items) < 2 {
  27. continue
  28. }
  29. err = trie.Insert([]byte(items[0]), len(values))
  30. if err != nil {
  31. return nil, err
  32. }
  33. if len(items) > 2 {
  34. values = append(values, items[1:])
  35. } else {
  36. values = append(values, strings.Fields(items[1]))
  37. }
  38. }
  39. return &dict{Trie: trie, Values: values}, nil
  40. }
  41. // prefixMatch str by Dict, returns the matched string and its according values
  42. func (d *dict) prefixMatch(str string) (map[string][]string, error) {
  43. if d.Trie == nil {
  44. return nil, fmt.Errorf("Trie is nil")
  45. }
  46. res := make(map[string][]string)
  47. for _, id := range d.Trie.PrefixMatch([]byte(str), 0) {
  48. key, err := d.Trie.Key(id)
  49. if err != nil {
  50. return nil, err
  51. }
  52. value, err := d.Trie.Value(id)
  53. if err != nil {
  54. return nil, err
  55. }
  56. res[string(key)] = d.Values[value]
  57. }
  58. return res, nil
  59. }
  60. var (
  61. defaultRead int64 = 16 * 1024 // 16kb
  62. defaultURL = "http://i0.hdslb.com/bfs/static/"
  63. )
  64. func raw(file string) (bs []byte) {
  65. client := http.Client{Timeout: 10 * time.Second}
  66. for i := 0; i < 3; i++ {
  67. resp, err := client.Get(defaultURL + file)
  68. if err != nil || resp.StatusCode != http.StatusOK {
  69. log.Error("bfs client url:%s file:%s err:%+v", defaultURL, file, err)
  70. time.Sleep(time.Millisecond * 50)
  71. continue
  72. }
  73. defer resp.Body.Close()
  74. bs, err = readAll(resp.Body, defaultRead)
  75. if err == nil {
  76. return
  77. }
  78. log.Error("bfs client url:%s file:%s err:%+v", defaultURL, file, err)
  79. }
  80. return
  81. }
  82. func readAll(r io.Reader, capacity int64) (b []byte, err error) {
  83. buf := bytes.NewBuffer(make([]byte, 0, capacity))
  84. defer func() {
  85. e := recover()
  86. if e == nil {
  87. return
  88. }
  89. if panicErr, ok := e.(error); ok && panicErr == bytes.ErrTooLarge {
  90. err = panicErr
  91. } else {
  92. panic(e)
  93. }
  94. }()
  95. _, err = buf.ReadFrom(r)
  96. return buf.Bytes(), err
  97. }