Compare commits

...

15 Commits
master ... dev

Author SHA1 Message Date
suguo.yao f641af4bc4 dictionary 2022-10-30 21:15:37 +08:00
suguo.yao 001af90911 cgo环境中没有 2022-10-30 18:03:51 +08:00
suguo.yao 1857b04aa5 文件找不到是因为编译出的版本问题 2022-10-30 18:01:07 +08:00
suguo.yao e38e2ea84a 文件还是找不到 2022-10-30 17:52:42 +08:00
suguo.yao 707bd87d85 文件找不到 2022-10-30 17:47:28 +08:00
suguo.yao 225a86b260 修改dockerfile 2022-10-30 16:54:46 +08:00
suguo.yao 9df3243900 配置从 os.Getenv获取 2022-10-30 16:38:28 +08:00
suguo.yao 335d424cc8 from 2022-10-30 15:02:07 +08:00
suguo.yao 83301d29ee 位置错了 2022-10-30 14:56:51 +08:00
suguo.yao 04d2f294ae 生成gofoudn 2022-10-30 14:54:40 +08:00
suguo.yao 5f4c22aaac 调整app 2022-10-30 14:48:08 +08:00
suguo.yao 130d8f03dc bug fix: dbs 2022-10-28 23:34:24 +08:00
suguo.yao 26fa21ddaf 分词工具调整为gse 2022-10-28 23:26:05 +08:00
suguo.yao 623cac96df tewt 2022-10-28 16:22:56 +08:00
suguo.yao 8007cdf5cb 改造中 2022-10-28 12:23:32 +08:00
55 changed files with 484 additions and 1618 deletions

View File

@ -1,4 +1,4 @@
FROM golang:1.18 as builder
FROM golang:1.19-alpine as builder
ENV GO111MODULE=on \
GOPROXY=https://goproxy.io
@ -6,23 +6,25 @@ ENV GO111MODULE=on \
COPY . /app
WORKDIR /app
RUN go get && go build -ldflags="-s -w" -installsuffix cgo
RUN go get && go build -ldflags="-s -w" -o gofound .
FROM debian:buster-slim
FROM harbor.ks.easyj.top/zt/alpine:0.1
ENV TZ=Asia/Shanghai \
LANG=C.UTF-8 \
APP_DIR=/usr/local/go_found
LABEL "authors"="suguo.yao"
LABEL "email"="ysg@myschools.me"
ENV APP_DIR=/app \
addr=0.0.0.0 \
port=5678 \
data="/data/gofound" \
dictionary="/data/gofound"
COPY --from=builder /app/gofound ${APP_DIR}/gofound
COPY --from=builder /app/config.yaml ${APP_DIR}/config.yaml
WORKDIR ${APP_DIR}
RUN ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime \
&& echo ${TZ} > /etc/timezone \
&& chmod +x gofound
EXPOSE 5678
CMD ["./gofound","-config","config.yaml"]
RUN chmod +x gofound
CMD ["./gofound"]

View File

@ -1,6 +1,6 @@
@url=http://admin:123456@192.168.0.254:5678/api
# @url=http://admin:123456@192.168.0.254:5678/api
# @url=http://192.168.0.254:5678/api
# @url=http://127.0.0.1:5678/api
@url=http://127.0.0.1:5678/api
###增加/修改索引
POST {{url}}/index?database=aaa HTTP/1.1
@ -33,7 +33,7 @@ POST {{url}}/index/batch?database=default HTTP/1.1
[{
"id": 88887,
"text": "深圳南站",
"text": "深圳南站 Systems Engineering Services",
"document": {
"title": "阿森松岛所445",
"number": 223
@ -58,7 +58,7 @@ POST {{url}}/index/remove?database=default HTTP/1.1
POST http://192.168.0.254:5678/ HTTP/1.1
{
"query":"融七牛",
"query":"Engineering",
"page":1,
"limit":10,
"order": "DESC",
@ -72,7 +72,7 @@ POST http://192.168.0.254:5678/ HTTP/1.1
POST {{url}}/query HTTP/1.1
{
"query":"融七牛",
"query":"Engineering",
"page":1,
"limit":10,
"order": "DESC",

View File

@ -1,10 +1,11 @@
#监听地址
addr: 0.0.0.0:5678
addr: 0.0.0.0
port: 5678
#数据目录
data: ./data
#词典目录
dictionary: ./data/dictionary.txt
dictionary:
# 最大线程数
gomaxprocs: 4

View File

@ -1,88 +0,0 @@
package core
import (
"context"
"fmt"
"log"
"net/http"
"os"
"os/signal"
"syscall"
"time"
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/searcher"
"myschools.me/suguo/gofound/searcher/words"
"myschools.me/suguo/gofound/web/controller"
"myschools.me/suguo/gofound/web/router"
)
// NewContainer 创建一个容器
func NewContainer(tokenizer *words.Tokenizer) *searcher.Container {
container := &searcher.Container{
Dir: global.CONFIG.Data,
Debug: global.CONFIG.Debug,
Tokenizer: tokenizer,
Shard: global.CONFIG.Shard,
Timeout: global.CONFIG.Timeout,
BufferNum: global.CONFIG.BufferNum,
}
if err := container.Init(); err != nil {
panic(err)
}
return container
}
func NewTokenizer(dictionaryPath string) *words.Tokenizer {
return words.NewTokenizer(dictionaryPath)
}
// Initialize 初始化
func Initialize() {
global.CONFIG = Parser()
defer func() {
if r := recover(); r != nil {
fmt.Printf("panic: %s\n", r)
}
}()
//初始化分词器
tokenizer := NewTokenizer(global.CONFIG.Dictionary)
global.Container = NewContainer(tokenizer)
// 初始化业务逻辑
controller.NewServices()
// 注册路由
r := router.SetupRouter()
// 启动服务
srv := &http.Server{
Addr: global.CONFIG.Addr,
Handler: r,
}
go func() {
// 开启一个goroutine启动服务
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Println("listen:", err)
}
}()
// 优雅关机
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
<-quit
log.Println("Shutdown Server ...")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := srv.Shutdown(ctx); err != nil {
log.Println("Server Shutdown:", err)
}
log.Println("Server exiting")
}

View File

@ -1,69 +0,0 @@
package core
import (
"flag"
"fmt"
"os"
"runtime"
"gopkg.in/yaml.v2"
"myschools.me/suguo/gofound/global"
)
// Parser 解析器
func Parser() *global.Config {
var addr = flag.String("addr", "127.0.0.1:5678", "设置监听地址和端口")
//兼容windows
dir := fmt.Sprintf(".%sdata", string(os.PathSeparator))
var dataDir = flag.String("data", dir, "设置数据存储目录")
var debug = flag.Bool("debug", true, "设置是否开启调试模式")
var dictionaryPath = flag.String("dictionary", "./data/dictionary.txt", "设置词典路径")
var enableAdmin = flag.Bool("enableAdmin", true, "设置是否开启后台管理")
var gomaxprocs = flag.Int("gomaxprocs", runtime.NumCPU()*2, "设置GOMAXPROCS")
var auth = flag.String("auth", "", "开启认证,例如: admin:123456")
var enableGzip = flag.Bool("enableGzip", true, "是否开启gzip压缩")
var timeout = flag.Int64("timeout", 10*60, "数据库超时关闭时间(秒)")
var bufferNum = flag.Int("bufferNum", 1000, "分片缓冲数量")
var configPath = flag.String("config", "", "配置文件路径,配置此项其他参数忽略")
flag.Parse()
config := &global.Config{}
if *configPath != "" {
//解析配置文件
//file, err := ioutil.ReadFile(*configPath)
file, err := os.ReadFile(*configPath) //详情https://github.com/golang/go/issues/42026
if err != nil {
panic(err)
}
err = yaml.Unmarshal(file, config)
if err != nil {
panic(err)
}
return config
}
config = &global.Config{
Addr: *addr,
Data: *dataDir,
Debug: *debug,
Dictionary: *dictionaryPath,
EnableAdmin: *enableAdmin,
Gomaxprocs: *gomaxprocs,
Auth: *auth,
EnableGzip: *enableGzip,
Timeout: *timeout,
BufferNum: *bufferNum,
}
return config
}

View File

@ -1,7 +0,0 @@
# 待办
+ 增加多库
+ 增加配置
+ 增加Web控制台
ta shuo d

View File

@ -1,406 +0,0 @@
# API
`gofound`启动之后会监听一个TCP端口接收来自客户端的搜索请求。处理http请求部分使用`gin`框架。
## 多数据库支持
从1.1版本开始我们支持了多数据库API接口中通过get参数来指定数据库。
如果不指定,默认数据库为`default`。
如:`api/index?database=db1` 其他post参数不变
如果指定的数据库名没有存在将会自动创建一个新的数据库。如果需要删除直接删除改数据库目录然后重启gofound即可。
## 增加/修改索引
| 接口地址 | /api/index |
|------|------------------|
| 请求方式 | POST |
| 请求类型 | application/json |
### 请求
| 字段 | 类型 | 必选 | 描述 |
|----------|--------|-----|-----------------------------------|
| id | uint32 | 是 | 文档的主键id需要保持唯一性如果id重复将会覆盖直接的文档。 |
| text | string | 是 | 需要索引的文本块 |
| document | object | 是 | 附带的文档数据json格式搜索的时候原样返回 |
+ POST /api/index
```json
{
"id": 88888,
"text": "深圳北站",
"document": {
"title": "阿森松岛所445",
"number": 223
}
}
```
+ 命令行
```bash
curl -H "Content-Type:application/json" -X POST --data '{"id":88888,"text":"深圳北站","document":{"title":"阿森松岛所445","number":223}}' http://127.0.0.1:5678/api/index
```
### 响应
```json
{
"state": true,
"message": "success"
}
```
## 批量增加/修改索引
| 接口地址 | /api/index/batch |
|------|------------------|
| 请求方式 | POST |
| 请求类型 | application/json |
参数与单个一致只是需要用数组包裹多个json对象例如
```json
[{
"id": 88888,
"text": "深圳北站",
"document": {
"title": "阿森松岛所445",
"number": 223
}
},{
"id": 22222,
"text": "北京东站",
"document": {
"title": "123123123",
"number": 123123
}
}]
```
## 删除索引
| 接口地址 | /api/index/remove |
| -------- | ----------------- |
| 请求方式 | POST |
| 请求类型 | application/json |
### 请求
| 字段 | 类型 | 必选 | 描述 |
|-----|--------|-----|---------|
| id | uint32 | 是 | 文档的主键id |
+ POST /api/remove
```json
{
"id": 88888
}
```
+ 命令行
```bash
curl -H "Content-Type:application/json" -X POST --data '{"id":88888}' http://127.0.0.1:5678/api/remove
```
### 响应
```json
{
"state": true,
"message": "success"
}
```
## 查询索引
`GoFound`提供了一种查询方式按照文本查询。与其他Nosql数据库不同`GoFound`不支持按照文档的其他查询。
| 接口地址 | /api/query |
|------|------------------|
| 请求方式 | POST |
| 请求类型 | application/json |
### 请求
| 字段 | 类型 | 必选 | 描述 |
| --------- | ------ | ---- | ------------------------------------------------------------ |
| query | string | 是 | 查询的关键词都是or匹配 |
| page | int | 否 | 页码默认为1 |
| limit | int | 否 | 返回的文档数量默认为100没有最大限制最好不要超过1000超过之后速度会比较慢内存占用会比较多 |
| order | string | 否 | 排序方式,取值`asc`和`desc`,默认为`desc`按id排序然后根据结果得分排序 |
| highlight | object | 否 | 关键字高亮相对text字段中的文本 |
| scoreExp | string | 否 | 根据文档的字段计算分数然后再进行排序例如score+[document.hot]*10表达式中score为关键字的分数,document.hot为document中的hot字段 |
### highlight
> 配置以后符合条件的关键词将会被preTag和postTag包裹
| 字段 | 描述 |
|---------|-------|
| preTag | 关键词前缀 |
| postTag | 关键词后缀 |
+ 示例
```json
{
"query": "上海哪里好玩",
"page": 1,
"limit": 10,
"order": "desc",
"highlight": {
"preTag": "<span style='color:red'>",
"postTag": "</span>"
}
}
```
+ POST /api/query
```json
{
"query": "深圳北站",
"page": 1,
"limit": 10,
"order": "desc"
}
```
+ 命令行
```bash
curl -H "Content-Type:application/json" -X POST --data '{"query":"深圳北站","page":1,"limit":10,"order":"desc"}' http://127.0.0.1:5678/api/query
```
### 响应
| 字段 | 类型 | 描述 |
|-----------|---------|-------------------------|
| time | float32 | 搜索文档用时 |
| total | int | 符合条件的数量 |
| pageCount | int | 页总数 |
| page | int | 当前页码 |
| limit | int | 每页数量 |
| documents | array | 文档列表,[参考索引文档](#增加/修改索引) |
```json
{
"state": true,
"message": "success",
"data": {
"time": 2.75375,
"total": 13487,
"pageCount": 1340,
"page": 1,
"limit": 10,
"documents": [
{
"id": 1675269553,
"text": "【深圳消费卡/购物券转让/求购信息】- 深圳赶集网",
"document": {
"id": "8c68e948de7c7eb4362de15434a3ace7",
"title": "【深圳消费卡/购物券转让/求购信息】- 深圳赶集网"
},
"score": 3
},
{
"id": 88888,
"text": "深圳北站",
"document": {
"number": 223,
"title": "阿森松岛所445"
},
"score": 2
},
{
"id": 212645608,
"text": "【深圳美容美发卡转让/深圳美容美发卡求购信息】- 深圳赶集网",
"document": {
"id": "d3ce16b68a90833cbc20b8a49e93b9cd",
"title": "【深圳美容美发卡转让/深圳美容美发卡求购信息】- 深圳赶集网"
},
"score": 1.5
},
{
"id": 1191140208,
"text": "【深圳赶集网】-免费发布信息-深圳分类信息门户",
"document": {
"id": "44be60a1d8b54c431e5511804062ae62",
"title": "【深圳赶集网】-免费发布信息-深圳分类信息门户"
},
"score": 1.5
},
{
"id": 4133884907,
"text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网",
"document": {
"id": "f25bb8136e8c2b02e3fcd65627a9ddbc",
"title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网"
},
"score": 1
},
{
"id": 206909132,
"text": "【沙嘴门票/电影票转让/求购信息】- 深圳赶集网",
"document": {
"id": "63ca3ea4ffd254454e738a0957efedc2",
"title": "【沙嘴门票/电影票转让/求购信息】- 深圳赶集网"
},
"score": 1
},
{
"id": 220071473,
"text": "【深圳健身卡转让/深圳健身卡求购信息】- 深圳赶集网",
"document": {
"id": "72d3d650c8a8a4e73b89b406f6dc76ef",
"title": "【深圳健身卡转让/深圳健身卡求购信息】- 深圳赶集网"
},
"score": 1
},
{
"id": 461974720,
"text": "铁路_论坛_深圳热线",
"document": {
"id": "73c96ac2c23bc0cb4fb12ce7660c8b35",
"title": "铁路_论坛_深圳热线"
},
"score": 1
},
{
"id": 490922879,
"text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网",
"document": {
"id": "93be0f35c484ddcd8c83602e27535d96",
"title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网"
},
"score": 1
},
{
"id": 525810194,
"text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网",
"document": {
"id": "e489dd19dce0de2c9f4e59c969ec9ec0",
"title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网"
},
"score": 1
}
],
"words": [
"深圳",
"北站"
]
}
}
```
## 查询状态
| 接口地址 | /api/status |
|------|------------------|
| 请求方式 | GET |
### 请求
```bash
curl http://127.0.0.1:5678/api/status
```
### 响应
```json
{
"state": true,
"message": "success",
"data": {
"index": {
"queue": 0,
"shard": 10,
"size": 531971
},
"memory": {
"alloc": 1824664656,
"heap": 1824664656,
"heap_idle": 10008625152,
"heap_inuse": 2100068352,
"heap_objects": 3188213,
"heap_released": 9252003840,
"heap_sys": 12108693504,
"sys": 12700504512,
"total": 11225144273040
},
"status": "ok",
"system": {
"arch": "arm64",
"cores": 10,
"os": "darwin",
"version": "go1.18"
}
}
}
```
## 删除数据库
| 接口地址 | /api/db/drop |
| -------- | ------------ |
| 请求方式 | GET |
### 请求
```bash
curl http://127.0.0.1:5678/api/drop?database=db_name
```
### 响应
```json
{
"state": true,
"message": "success",
}
```
## 在线分词
| 接口地址 | /api/word/cut |
|------|-----------------|
| 请求方式 | GET |
### 请求参数
| 字段 | 类型 | 必选 | 描述 |
|-----|--------|-----|-----|
| q | string | 关键词 |
### 请求
```bash
curl http://127.0.0.1:5678/api/word/cut?q=上海和深圳哪个城市幸福指数高
```
### 响应
```json
{
"state": true,
"message": "success",
"data": [
"上海",
"深圳",
"哪个",
"城市",
"幸福",
"指数"
]
}
```

View File

@ -1,59 +0,0 @@
# 编译
`gofound` 基于`golang-1.18`编译之前需要安装对于的golang版本。
推荐使用编译好的[二进制文件](https://github.com/newpanjing/gofound/releases)
## Admin
> 如果需要Admin部分请先构建adminadmin基于vue+element-ui+vite而这些也需要安装nodejs
构建命令:
```shell
cd ./web/admin/assets/web/
npm install
npm run build
```
完成以上步骤之后才能使用admin
## 编译
```shell
go get
go build -o gofound
```
## 依赖
```shell
go 1.18
require (
github.com/emirpasic/gods v1.12.0
github.com/gin-gonic/gin v1.7.7
github.com/yanyiwu/gojieba v1.1.2
)
require (
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.13.0 // indirect
github.com/go-playground/universal-translator v0.17.0 // indirect
github.com/go-playground/validator/v10 v10.4.1 // indirect
github.com/golang/protobuf v1.3.3 // indirect
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db // indirect
github.com/json-iterator/go v1.1.9 // indirect
github.com/leodido/go-urn v1.2.0 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 // indirect
github.com/syndtr/goleveldb v1.0.0 // indirect
github.com/ugorji/go/codec v1.1.7 // indirect
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 // indirect
golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf // indirect
gopkg.in/yaml.v2 v2.2.8 // indirect
)
```

View File

@ -1,126 +0,0 @@
# 配置
在编译好[gofound](./compile.md)之后,就可以启动了。
```shell
./gofound
```
## 参数
```shell
./gofound -h
-addr string
设置监听地址和端口 (default "0.0.0.0:5678")
-auth string
开启认证,例如: admin:123456
-config string
配置文件路径,配置此项其他参数忽略
-data string
设置数据存储目录 (default "./data")
-debug
设置是否开启调试模式 (default true)
-dictionary string
设置词典路径 (default "./data/dictionary.txt")
-enableAdmin
设置是否开启后台管理 (default true)
-enableGzip
是否开启gzip压缩 (default true)
-gomaxprocs int
设置GOMAXPROCS (default 20)
-timeout int
数据库超时关闭时间(秒) (default 600)
```
### addr
指定要监听的地址和端口。默认为`127.0.0.1:5678` 监听本地地址。
```shell
./gofound --addr=127.0.0.1:5678
./gofound --addr=:5678
./gofound --addr=0.0.0.0:5678
./gofound --addr=192.168.1.1:5678
```
### auth
设置admin和api接口的用户名密码采用basic auth
```shell
./gofound --auth=admin:123456
```
### data
指定索引数据存储的目录,可以是相对路径,也可以是绝对路径。
相对路径是存在`gofound`所在目录下的。
```shell
```shell
./gofound --data=./data
./gofound --data=/www/data
```
### debug
设置是否开启调试模式。默认为`true`。
```shell
./gofound --debug=false
```
### dictionary
设置自定义词典路径。默认为`./data/dictionary.txt`。
```shell
./gofound --dictionary=./data/dictionary.txt
```
### enableAdmin
设置是否开启后台管理。默认为`true`。
```shell
./gofound --enableAdmin=false
```
### enableGzip
设置是否开启gzip压缩。默认为`true`。
```shell
./gofound --enableGzip=false
```
### gomaxprocs
设置GOMAXPROCS。默认为CPU数量X2。
```shell
./gofound --gomaxprocs=10
```
### shard
设置文件分片数量。默认为`10`。分片越多查询会越快相反的磁盘IO和CPU会越多。
```shell
./gofound --shard=10
```
### timeout
单位为秒。默认为600秒。
数据库超时关闭时间,如果设置为-1表示永不关闭适合频繁查询的。如果时间过久会造成内存占用过多
```shell
./gofound --timeout=600
```

View File

@ -1,13 +0,0 @@
# 示例
编译好的下载地址:
[https://github.com/newpanjing/gofound/releases](https://github.com/newpanjing/gofound/releases)
将编译后的`gofound`文件复制到`/usr/local/bin`目录下,然后在命令行中运行`gofound`命令即可。
```shell
gofound --addr=:5678 --data=./data
```
启动成后,就可以调用[API](./api.md)来进行索引和查询了。

View File

@ -1,14 +0,0 @@
# 索引原理和流程
`gofound` 采用平衡二叉树对文本关键词进行索引,然后利用`leveldb`存储id值以及对应的文档。
## 原理图
![](./images/index.png)
## 二叉平衡查找树
二叉平衡查找树是一个高效的查找树,它的查找速度是`O(log n)`,并且每个节点的子树都是平衡的。
`gofound`默认是分10个文件块也就是10个平衡查找树每个平衡查找树的深度是`log10(n)`。
1亿条索引在一颗树查找最大26次如果10亿数据最大查找也是26次会根据key的hash值取模shard数量来找到对应的索引进行检索。

View File

@ -1,11 +0,0 @@
# GoFound发布日志
## 1.1
+ 优化内存占用
+ 提升查询速度
+ 增加自定义词库配置
+ 增加Admin界面
+ 增加认证功能
## 1.0.2
+ 完成基础功能

View File

@ -1,20 +0,0 @@
# 持久化
持久化采用golang版本的leveldb
+ 关键词与ID映射
二叉树的每个关键词都与ID相关联这样在搜索的时候可以先找到索引的key然后在通过key找到对应的id数组。
映射文件采用的是`leveldb`存储,编码格式为`gob`
[查看源码](../searcher/storage/leveldb_storage.go)
+ 文档
文档是指在索引时传入的数据,在搜索的时候会原样返回。
存储文件采用的是leveldb存储编码格式为gob
[查看源码](../searcher/storage/leveldb_storage.go)

View File

@ -1,17 +0,0 @@
# 内存
我们的目标是以最小的内存使用和最大的性能,带来非凡的体验。
测试以1000万数据为基数。
## 内存理论
索引100亿条数据只需要27.3MB的内存经过bitmap的压缩磁盘空间与1.0x一致。
查询100亿条搜索结果只需要27.3MB的内存经过bitmap的压缩
相比1.0x版本,内存可以减少 2794.43倍。同时不兼容1.0x版本的索引,需要重新索引一次。
## 查询理论
索引时间相比1.0x减少33.8倍查询时间比1.0x快23%。如果数据超过亿级查询速度比1.0x版本慢3倍。

12
gin/config.go Normal file
View File

@ -0,0 +1,12 @@
package gin
// GIN 配置
type Config struct {
RootPath string
Addr string
Port int
Ssl bool
SslPem string
SslKey string
EnableGzip bool
}

View File

@ -1,4 +1,4 @@
package middleware
package gin
import (
"net/http"

View File

@ -1,10 +1,9 @@
package middleware
package gin
import (
"runtime/debug"
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/web"
)
// Exception 处理异常
@ -13,7 +12,7 @@ func Exception() gin.HandlerFunc {
defer func() {
if err := recover(); err != nil {
debug.PrintStack()
c.JSON(200, web.Error(err.(error).Error()))
// c.JSON(200, web.Error(err.(error).Error()))
}
c.Abort()
}()

71
gin/gin.go Normal file
View File

@ -0,0 +1,71 @@
package gin
import (
"fmt"
"log"
"net/http"
"strconv"
"time"
"github.com/gin-contrib/gzip"
"github.com/gin-gonic/gin"
"github.com/unrolled/secure"
)
func Service(conf *Config) {
if conf == nil {
conf = &Config{
RootPath: "/",
Addr: "0.0.0.0",
Port: 80,
Ssl: false,
SslPem: "server.pem",
SslKey: "server.key",
}
}
go func() {
router := gin.New()
if conf.EnableGzip {
router.Use(gzip.Gzip(gzip.DefaultCompression))
}
routerSetup(router, &conf.RootPath)
if conf.Ssl {
router.Use(tlsHandler(conf))
}
s := &http.Server{
Addr: fmt.Sprintf("%s:%d", conf.Addr, conf.Port),
Handler: router,
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
MaxHeaderBytes: 1 << 20,
}
log.Printf("start service on %s", fmt.Sprintf("%s:%d", conf.Addr, conf.Port))
if conf.Ssl {
log.Fatal(s.ListenAndServeTLS(conf.SslPem, conf.SslKey))
} else {
if err := s.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Println("listen:", err)
}
}
}()
}
func tlsHandler(conf *Config) gin.HandlerFunc {
return func(c *gin.Context) {
secureMiddleware := secure.New(secure.Options{
SSLRedirect: true,
SSLHost: ":" + strconv.Itoa(conf.Port),
})
err := secureMiddleware.Process(c.Writer, c.Request)
// If there was an error, do not continue.
if err != nil {
return
}
c.Next()
}
}

38
gin/router.go Normal file
View File

@ -0,0 +1,38 @@
package gin
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/handler"
)
// 路由配置
func routerSetup(router *gin.Engine, rootpath *string) {
router.Use(gin.Recovery())
router.Use(Cors(), Exception())
baseRouter := router.Group(*rootpath)
{
baseRouter.POST("query", handler.IndexQuery)
baseRouter.GET("status", handler.Status)
baseRouter.GET("gc", handler.GC)
databaseRouter := baseRouter.Group("db")
{
databaseRouter.GET("list", handler.DatabaseShow) // 查看数据库
databaseRouter.GET("drop", handler.DatabaseDrop) // 删除数据库
databaseRouter.GET("create", handler.DatabaseCreate) // 添加数据库
}
indexRouter := baseRouter.Group("index")
{
indexRouter.POST("", handler.IndexAdd) // 添加单条索引
indexRouter.POST("batch", handler.IndexBatchAdd) // 批量添加索引
indexRouter.POST("remove", handler.IndexRemove) // 删除索引
}
wordRouter := baseRouter.Group("word")
{
wordRouter.GET("cut", handler.WordCut)
}
}
}

View File

@ -2,15 +2,12 @@ package global
// Config 服务器设置
type Config struct {
Addr string `yaml:"addr"` // 监听地址
Addr string
Port int
Data string `json:"data"` // 数据目录
Debug bool `yaml:"debug"` // 调试模式
Dictionary string `json:"dictionary"` // 字典路径
EnableAdmin bool `yaml:"enableAdmin"` //启用admin
Gomaxprocs int `json:"gomaxprocs"` //GOMAXPROCS
Shard int `yaml:"shard"` //分片数
Auth string `json:"auth"` //认证
EnableGzip bool `yaml:"enableGzip"` //是否开启gzip压缩
Timeout int64 `json:"timeout"` //超时时间
BufferNum int `yaml:"bufferNum"` //分片缓冲数
}

View File

@ -1,10 +1,45 @@
package global
import (
"myschools.me/suguo/gofound/searcher"
"os"
"runtime"
"strconv"
)
var (
CONFIG *Config // 服务器设置
Container *searcher.Container
)
func Parse() {
addr := os.Getenv("addr")
if addr == "" {
addr = "127.0.0.1"
}
port := os.Getenv("port")
if port == "" {
port = "5678"
}
p, err := strconv.Atoi(port)
if err != nil {
p = 5678
}
dataDir := os.Getenv("data") //设置数据目录
if dataDir == "" {
dataDir = "/data"
}
dictionaryPath := os.Getenv("dictionary") //设置词典路径
gomaxprocs := runtime.NumCPU() * 2 //设置GOMAXPROCS
timeout := int64(10 * 60) //数据库超时关闭时间(秒)
bufferNum := 1000 //分片缓冲数量
CONFIG = &Config{
Addr: addr,
Port: p,
Data: dataDir,
Dictionary: dictionaryPath,
Gomaxprocs: gomaxprocs,
Timeout: timeout,
BufferNum: bufferNum,
}
}

26
go.mod
View File

@ -5,40 +5,42 @@ go 1.18
require (
github.com/Knetic/govaluate v3.0.0+incompatible
github.com/emirpasic/gods v1.12.0
github.com/gin-contrib/gzip v0.0.5
github.com/gin-gonic/gin v1.7.7
github.com/gin-contrib/gzip v0.0.6
github.com/gin-gonic/gin v1.8.1
github.com/go-ego/gse v0.70.2
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46
github.com/shirou/gopsutil/v3 v3.22.4
github.com/syndtr/goleveldb v1.0.0
github.com/unrolled/secure v1.13.0
github.com/wangbin/jiebago v0.3.2
gopkg.in/yaml.v2 v2.4.0
)
require (
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-playground/locales v0.13.0 // indirect
github.com/go-playground/universal-translator v0.17.0 // indirect
github.com/go-playground/validator/v10 v10.4.1 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/go-playground/locales v0.14.0 // indirect
github.com/go-playground/universal-translator v0.18.0 // indirect
github.com/go-playground/validator/v10 v10.10.0 // indirect
github.com/goccy/go-json v0.9.7 // indirect
github.com/golang/snappy v0.0.3 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kr/pretty v0.2.0 // indirect
github.com/leodido/go-urn v1.2.0 // indirect
github.com/leodido/go-urn v1.2.1 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.0.1 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/tklauser/go-sysconf v0.3.10 // indirect
github.com/tklauser/numcpus v0.4.0 // indirect
github.com/ugorji/go/codec v1.1.7 // indirect
github.com/ugorji/go/codec v1.2.7 // indirect
github.com/vcaesar/cedar v0.20.1 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect
golang.org/x/net v0.0.0-20220412020605-290c469a71a5 // indirect
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect
google.golang.org/protobuf v1.28.0 // indirect
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
)

107
gofound.d
View File

@ -1,107 +0,0 @@
#!/bin/bash
# chkconfig: 2345 90 10
# Description: Startup script for gofound on Debian. Place in /etc/init.d and
# run 'update-rc.d -f gofound defaults', or use the appropriate command on your
# distro. For CentOS/Redhat run: 'chkconfig --add gofound'
### BEGIN INIT INFO
#
# Provides: gofound.d
# Required-Start: $local_fs $remote_fs
# Required-Stop: $local_fs $remote_fs
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: starts gofound
# Description: This file should be used to gofound scripts to be placed in /etc/init.d.
#
### END INIT INFO
## 2345是默认启动级别级别有0-6共7个级别 90是启动优先级10是停止优先级优先级范围是0100数字越大优先级越低
## Fill in name of program here.
PROG="gofound"
PROG_PATH="/usr/local/bin" ## Not need, but sometimes helpful (if $PROG resides in /opt for example).
PROG_ARGS="--config=/gofound_path/config.yaml"
PID_PATH="/var/run/"
start() {
if [ -e "$PID_PATH/$PROG.pid" ]; then
## Program is running, exit with error.
echo "Error! $PROG_PATH/$PROG is currently running!" 1>&2
exit 1
else
## Change from /dev/null to something like /var/log/$PROG if you want to save output.
$PROG_PATH/$PROG $PROG_ARGS 2>&1 >/var/log/$PROG &
#pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1`
pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'`
#echo $PROG_PATH/$PROG $PROG_ARGS
echo "$PROG_PATH/$PROG($pid) started"
echo $pid > "$PID_PATH/$PROG.pid"
fi
}
stop() {
echo "begin stop"
if [ -e "$PID_PATH/$PROG.pid" ]; then
## Program is running, so stop it
#pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1`
pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'`
kill $pid
rm -f "$PID_PATH/$PROG.pid"
echo "$PROG_PATH/$PROG($pid) stopped"
else
## Program is not running, exit with error.
echo "Error! $PROG_PATH/$PROG not started!" 1>&2
fi
}
status() {
if [ -e "$PID_PATH/$PROG.pid" ]; then
## Program is running, so stop it
#pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1`
pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'`
if [ $pid ]; then
echo "$PROG_PATH/$PROG($pid) is running..."
else
echo "$PROG_PATH/$PROG dead but pid file exists" 1>&2
fi
else
## Program is not running, exit with error.
echo "Error! $PROG_PATH/$PROG not started!" 1>&2
fi
}
## Check to see if we are running as root first.
## Found at http://www.cyberciti.biz/tips/shell-root-user-check-script.html
if [ "$(id -u)" != "0" ]; then
echo "This script must be run as root" 1>&2
exit 1
fi
case "$1" in
start)
start
exit 0
;;
stop)
stop
exit 0
;;
reload|restart|force-reload)
stop
start
exit 0
;;
status)
status
exit 0
;;
*)
echo "Usage: $0 {start|stop|restart|status}" 1>&2
exit 1
;;
esac

18
handler/base-handler.go Normal file
View File

@ -0,0 +1,18 @@
package handler
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/service"
)
// 释放GC
func GC(c *gin.Context) {
service.ServerGC()
ResponseSuccess(c)
}
// Status 获取服务器状态
func Status(c *gin.Context) {
r := service.ServerStatus()
ResponseSuccessWithData(c, r)
}

View File

@ -1,6 +1,9 @@
package controller
package handler
import "github.com/gin-gonic/gin"
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/service"
)
// DatabaseDrop 删除数据库
func DatabaseDrop(c *gin.Context) {
@ -10,7 +13,7 @@ func DatabaseDrop(c *gin.Context) {
return
}
if err := srv.Database.Drop(dbName); err != nil {
if err := service.DatabaseDrop(dbName); err != nil {
ResponseErrorWithMsg(c, err.Error())
return
}
@ -26,11 +29,11 @@ func DatabaseCreate(c *gin.Context) {
return
}
srv.Database.Create(dbName)
service.DatabaseCreate(dbName)
ResponseSuccessWithData(c, "创建成功")
}
// DBS 查询数据库
func DBS(c *gin.Context) {
ResponseSuccessWithData(c, srv.Database.Show())
// 查询数据库
func DatabaseShow(c *gin.Context) {
ResponseSuccessWithData(c, service.DatabaseShow())
}

View File

@ -1,12 +1,30 @@
package controller
package handler
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/searcher/model"
"myschools.me/suguo/gofound/service"
)
// AddIndex 添加索引
func AddIndex(c *gin.Context) {
// IndexQuery 查询
func IndexQuery(c *gin.Context) {
var request = &model.SearchRequest{}
if err := c.ShouldBindJSON(&request); err != nil {
ResponseErrorWithMsg(c, err.Error())
return
}
request.Database = c.Query("database")
//调用搜索
r, err := service.IndexQuery(request)
if err != nil {
ResponseErrorWithMsg(c, err.Error())
} else {
ResponseSuccessWithData(c, r)
}
}
// IndexAdd 添加索引
func IndexAdd(c *gin.Context) {
document := &model.IndexDoc{}
if err := c.ShouldBindJSON(&document); err != nil {
ResponseErrorWithMsg(c, err.Error())
@ -16,7 +34,7 @@ func AddIndex(c *gin.Context) {
if dbName == "" {
dbName = "default"
}
err := srv.Index.AddIndex(dbName, document)
err := service.IndexAdd(&dbName, document)
if err != nil {
ResponseErrorWithMsg(c, err.Error())
return
@ -25,8 +43,8 @@ func AddIndex(c *gin.Context) {
ResponseSuccessWithData(c, nil)
}
// BatchAddIndex 批量添加索引
func BatchAddIndex(c *gin.Context) {
// IndexBatchAdd 批量添加索引
func IndexBatchAdd(c *gin.Context) {
documents := make([]*model.IndexDoc, 0)
if err := c.BindJSON(&documents); err != nil {
ResponseErrorWithMsg(c, err.Error())
@ -38,7 +56,7 @@ func BatchAddIndex(c *gin.Context) {
dbName = "default"
}
err := srv.Index.BatchAddIndex(dbName, documents)
err := service.IndexBatchAdd(dbName, documents)
if err != nil {
ResponseErrorWithMsg(c, err.Error())
return
@ -47,8 +65,8 @@ func BatchAddIndex(c *gin.Context) {
ResponseSuccess(c)
}
// RemoveIndex 删除索引
func RemoveIndex(c *gin.Context) {
// IndexRemove 删除索引
func IndexRemove(c *gin.Context) {
removeIndexModel := &model.RemoveIndexModel{}
if err := c.BindJSON(&removeIndexModel); err != nil {
ResponseErrorWithMsg(c, err.Error())
@ -61,7 +79,7 @@ func RemoveIndex(c *gin.Context) {
return
}
if err := srv.Index.RemoveIndex(dbName, removeIndexModel); err != nil {
if err := service.IndexRemove(dbName, removeIndexModel); err != nil {
ResponseErrorWithMsg(c, err.Error())
return
}

View File

@ -1,4 +1,4 @@
package controller
package handler
import (
"net/http"

View File

@ -1,6 +1,9 @@
package controller
package handler
import "github.com/gin-gonic/gin"
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/service"
)
// WordCut 分词
func WordCut(c *gin.Context) {
@ -9,6 +12,6 @@ func WordCut(c *gin.Context) {
ResponseErrorWithMsg(c, "请输入关键字")
return
}
r := srv.Word.WordCut(q)
r := service.WordCut(q)
ResponseSuccessWithData(c, r)
}

39
main.go
View File

@ -1,10 +1,43 @@
package main
import (
"myschools.me/suguo/gofound/core"
"fmt"
"log"
"os"
"os/signal"
"syscall"
"myschools.me/suguo/gofound/gin"
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/service"
)
func main() {
//初始化容器和参数解析
core.Initialize()
global.Parse()
defer func() {
if r := recover(); r != nil {
fmt.Printf("panic: %s\n", r)
}
}()
//初始化分词器
tokenizer := service.NewTokenizer(global.CONFIG.Dictionary)
service.ContainerInit(tokenizer)
gin.Service(&gin.Config{
RootPath: "/api",
Addr: global.CONFIG.Addr,
Port: global.CONFIG.Port,
Ssl: false,
SslPem: "",
SslKey: "",
EnableGzip: true,
})
// 优雅关机
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
<-quit
log.Println("Shutdown Server ...")
}

View File

@ -7,6 +7,7 @@
#### 项目资料
https://github.com/sea-team/gofound
github.com/go-ego/gse
#### 实例环境构建

View File

@ -1,7 +0,0 @@
# GoFound SDK设计指南
## 支持自定义配置
在支持自定义配置的时候,同时提供默认配置项
##
支持`gofound` 提供的所有操作,增删改查等

View File

@ -1,34 +0,0 @@
package gofound
import (
"runtime"
"myschools.me/suguo/gofound/searcher/model"
"myschools.me/suguo/gofound/searcher/system"
)
// Query 查询
func (c *Client) Query(req *model.SearchRequest) (*model.SearchResult, error) {
r, err := c.container.GetDataBase(req.Database).MultiSearch(req)
if err != nil {
return nil, err
}
return r, nil
}
func (*Client) GC() {
runtime.GC()
}
func (c *Client) Status() (map[string]interface{}, error) {
var m runtime.MemStats
runtime.ReadMemStats(&m)
// TODO 其他系统信息
r := map[string]interface{}{
"memory": system.GetMemStat(),
"cpu": system.GetCPUStatus(),
"disk": system.GetDiskStat(),
}
return r, nil
}

View File

@ -1,83 +0,0 @@
package gofound
import (
"fmt"
"os"
"runtime"
"sync"
"myschools.me/suguo/gofound/core"
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/searcher"
)
var once sync.Once
// Client 应该对外部屏蔽细节
// 尽量少的提供接口,但是又要保证功能性
type Client struct {
config *global.Config //服务配置
container *searcher.Container //运行实体
}
func newDefaultConfig() *global.Config {
return &global.Config{
Addr: "127.0.0.1:5678",
Data: fmt.Sprintf(".%sdata", string(os.PathSeparator)),
Debug: true,
Dictionary: "./data/dictionary.txt",
EnableAdmin: true,
Gomaxprocs: runtime.NumCPU() * 2,
Shard: 0,
Auth: "",
EnableGzip: true,
Timeout: 10 * 60,
}
}
func newTokenizerAndContainer(config *global.Config) *searcher.Container {
tokenizer := core.NewTokenizer(global.CONFIG.Dictionary)
return core.NewContainer(tokenizer)
}
// NewClient 通过参数进行配置,必须指定全部参数
func NewClient(config *global.Config) *Client {
global.CONFIG = config
//初始化分词器
container := newTokenizerAndContainer(config)
global.Container = container
return &Client{
config: config,
container: container,
}
}
// Default 使用默认参数创建服务
func Default() *Client {
global.CONFIG = newDefaultConfig()
container := newTokenizerAndContainer(global.CONFIG)
global.Container = container
return &Client{
config: global.CONFIG,
container: container,
}
}
// SetAddr 设置Web服务地址
func (c *Client) SetAddr(addr string) *Client {
if addr == "" {
return c
}
c.config.Addr = addr
return c
}
// SetData 设置数据存放地址
func (c *Client) SetData(dir string) *Client {
if dir == "" {
return c
}
c.config.Data = dir
return c
}
//TODO 其他配置项

View File

@ -1,31 +0,0 @@
package gofound
import (
"github.com/syndtr/goleveldb/leveldb/errors"
"myschools.me/suguo/gofound/searcher"
)
// Show 查看数据库
func (c *Client) Show() (map[string]*searcher.Engine, error) {
// 保持分格一致
return c.container.GetDataBases(), nil
}
// Drop 删除数据库
func (c *Client) Drop(dbName string) error {
if dbName == "" {
return errors.New("database not exist")
}
if err := c.container.DropDataBase(dbName); err != nil {
return err
}
return nil
}
// Create 创建数据库
func (c *Client) Create(dbName string) (*searcher.Engine, error) {
if dbName == "" {
return nil, errors.New("database name is empty")
}
return c.container.GetDataBase(dbName), nil
}

View File

@ -1,44 +0,0 @@
package gofound
import (
"errors"
"myschools.me/suguo/gofound/searcher/model"
)
// AddIndex 添加索引
func (c *Client) AddIndex(dbName string, request *model.IndexDoc) error {
if request.Text == "" {
return errors.New("text is empty")
}
c.container.GetDataBase(dbName).IndexDocument(request)
return nil
}
// BatchAddIndex 批次添加索引
func (c *Client) BatchAddIndex(dbName string, documents []*model.IndexDoc) error {
db := c.container.GetDataBase(dbName)
// 数据预处理
for _, doc := range documents {
if doc.Text == "" {
return errors.New("text is empty")
}
if doc.Document == nil {
return errors.New("document is empty")
}
}
for _, doc := range documents {
go db.IndexDocument(doc)
}
return nil
}
// RemoveIndex 删除索引
func (c *Client) RemoveIndex(dbName string, data *model.RemoveIndexModel) error {
db := c.container.GetDataBase(dbName)
if err := db.RemoveIndex(data.Id); err != nil {
return err
}
return nil
}

View File

@ -1,15 +0,0 @@
package gofound
// WordCut 分词
func (c *Client) WordCut(keyword string) []string {
return c.container.Tokenizer.Cut(keyword)
}
// BatchWordCut 批量分词
func (c *Client) BatchWordCut(keywords []string) *[][]string {
res := make([][]string, len(keywords))
for _, w := range keywords {
res = append(res, c.container.Tokenizer.Cut(w))
}
return &res
}

View File

@ -1,27 +0,0 @@
package searcher
import (
"fmt"
"testing"
)
func TestContainer_Init(t *testing.T) {
c := &Container{
Dir: "/Users/panjing/GolandProjects/gofound/dbs",
Debug: true,
}
err := c.Init()
if err != nil {
panic(err)
}
test := c.GetDataBase("test")
fmt.Println(test.GetIndexCount())
all := c.GetDataBases()
for name, engine := range all {
fmt.Println(name)
fmt.Println(engine)
}
}

View File

@ -1,35 +1,42 @@
package words
import (
"embed"
"strings"
"github.com/wangbin/jiebago"
"myschools.me/suguo/gofound/searcher/utils"
"github.com/go-ego/gse"
)
var (
//go:embed data/*.txt
dictionaryFS embed.FS
)
// var (
// //go:embed data/*.txt
// dictionaryFS embed.FS
// )
type Tokenizer struct {
seg jiebago.Segmenter
seg gse.Segmenter
}
func NewTokenizer(dictionaryPath string) *Tokenizer {
file, err := dictionaryFS.Open("data/dictionary.txt")
// file, err := dictionaryFS.Open("data/dictionary.txt")
// if err != nil {
// panic(err)
// }
// utils.ReleaseAssets(file, dictionaryPath)
tokenizer := &Tokenizer{}
s, err := gse.New("zh", "alpha")
if err != nil {
panic(err)
}
utils.ReleaseAssets(file, dictionaryPath)
s.LoadDict()
// s.LoadDict("zh_s")
tokenizer.seg = s
tokenizer := &Tokenizer{}
err = tokenizer.seg.LoadDictionary(dictionaryPath)
if dictionaryPath != "" {
err = tokenizer.seg.LoadDict(dictionaryPath)
if err != nil {
panic(err)
}
}
return tokenizer
}
@ -37,20 +44,14 @@ func NewTokenizer(dictionaryPath string) *Tokenizer {
func (t *Tokenizer) Cut(text string) []string {
//不区分大小写
text = strings.ToLower(text)
//移除所有的标点符号
text = utils.RemovePunctuation(text)
//移除所有的空格
text = utils.RemoveSpace(text)
var wordMap = make(map[string]struct{})
resultChan := t.seg.CutForSearch(text, true)
resultChan := t.seg.Cut(text, true)
resultChan = t.seg.Trim(resultChan)
var wordsSlice []string
for {
w, ok := <-resultChan
if !ok {
break
}
for _, w := range resultChan {
_, found := wordMap[w]
if !found {
//去除重复的词

View File

@ -0,0 +1,27 @@
package service
import (
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/searcher"
"myschools.me/suguo/gofound/searcher/words"
)
var container *searcher.Container
// 容器初始化
func ContainerInit(tokenizer *words.Tokenizer) {
container = &searcher.Container{
Dir: global.CONFIG.Data,
Tokenizer: tokenizer,
Shard: global.CONFIG.Shard,
Timeout: global.CONFIG.Timeout,
BufferNum: global.CONFIG.BufferNum,
}
if err := container.Init(); err != nil {
panic(err)
}
}
func NewTokenizer(dictionaryPath string) *words.Tokenizer {
return words.NewTokenizer(dictionaryPath)
}

View File

@ -0,0 +1,23 @@
package service
import (
"myschools.me/suguo/gofound/searcher"
)
// 查看数据库
func DatabaseShow() map[string]*searcher.Engine {
return container.GetDataBases()
}
// 删除数据库
func DatabaseDrop(dbName string) error {
if err := container.DropDataBase(dbName); err != nil {
return err
}
return nil
}
// 创建数据库
func DatabaseCreate(dbName string) *searcher.Engine {
return container.GetDataBase(dbName)
}

34
service/index-service.go Normal file
View File

@ -0,0 +1,34 @@
package service
import (
"myschools.me/suguo/gofound/searcher/model"
)
// 添加索引
func IndexAdd(dbName *string, req *model.IndexDoc) error {
return container.GetDataBase(*dbName).IndexDocument(req)
}
func IndexQuery(request *model.SearchRequest) (*model.SearchResult, error) {
return container.GetDataBase(request.Database).MultiSearch(request)
}
// 批次添加索引
func IndexBatchAdd(dbName string, documents []*model.IndexDoc) error {
db := container.GetDataBase(dbName)
for _, doc := range documents {
if err := db.IndexDocument(doc); err != nil {
return err
}
}
return nil
}
// 删除索引
func IndexRemove(dbName string, data *model.RemoveIndexModel) error {
db := container.GetDataBase(dbName)
if err := db.RemoveIndex(data.Id); err != nil {
return err
}
return nil
}

View File

@ -9,7 +9,26 @@ import (
"myschools.me/suguo/gofound/searcher/utils"
)
func Callback() map[string]interface{} {
// 释放GC
func ServerGC() {
runtime.GC()
}
// 获取服务器状态
func ServerStatus() map[string]interface{} {
var m runtime.MemStats
runtime.ReadMemStats(&m)
r := map[string]interface{}{
"memory": system.GetMemStat(),
"cpu": system.GetCPUStatus(),
"disk": system.GetDiskStat(),
"system": serverInfo(),
}
return r
}
func serverInfo() map[string]interface{} {
return map[string]interface{}{
"os": runtime.GOOS,
"arch": runtime.GOARCH,
@ -19,16 +38,13 @@ func Callback() map[string]interface{} {
"dataPath": global.CONFIG.Data,
"dictionaryPath": global.CONFIG.Dictionary,
"gomaxprocs": runtime.NumCPU() * 2,
"debug": global.CONFIG.Debug,
"shard": global.CONFIG.Shard,
"dataSize": system.GetFloat64MB(utils.DirSizeB(global.CONFIG.Data)),
"executable": os.Args[0],
"dbs": global.Container.GetDataBaseNumber(),
"dbs": container.GetDataBaseNumber(),
//"indexCount": global.container.GetIndexCount(),
//"documentCount": global.container.GetDocumentCount(),
"pid": os.Getpid(),
"enableAuth": global.CONFIG.Auth != "",
"enableGzip": global.CONFIG.EnableGzip,
"bufferNum": global.CONFIG.BufferNum,
}
}

34
service/word-service.go Normal file
View File

@ -0,0 +1,34 @@
package service
import (
"fmt"
"github.com/go-ego/gse"
)
// 分词
func WordCut(keyword string) []string {
return container.Tokenizer.Cut(keyword)
}
var (
seg gse.Segmenter
)
func init() {
s, err := gse.New("zh", "alpha")
if err != nil {
panic(err)
}
// s.LoadDict()
s.LoadDict("zh_s")
seg = s
}
func Cut(w string) []string {
hmm := seg.Cut(w, false)
po := seg.Pos(w, true)
r := seg.TrimPos(po)
fmt.Println(r)
return seg.Trim(hmm)
}

11
service/word_test.go Normal file
View File

@ -0,0 +1,11 @@
package service
import (
"fmt"
"testing"
)
func TestCut(t *testing.T) {
a := Cut("而这些并不是完全重要,更加重要的问题是, 海贝尔曾经提到过,人生就是学校。在那里,与其说好的教师是幸福,不如说好的教师是不幸。")
fmt.Println(a)
}

View File

@ -1,39 +0,0 @@
package controller
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/searcher/model"
)
func Welcome(c *gin.Context) {
ResponseSuccessWithData(c, "Welcome to GoFound")
}
// Query 查询
func Query(c *gin.Context) {
var request = &model.SearchRequest{}
if err := c.ShouldBindJSON(&request); err != nil {
ResponseErrorWithMsg(c, err.Error())
return
}
request.Database = c.Query("database")
//调用搜索
r, err := srv.Base.Query(request)
if err != nil {
ResponseErrorWithMsg(c, err.Error())
} else {
ResponseSuccessWithData(c, r)
}
}
// GC 释放GC
func GC(c *gin.Context) {
srv.Base.GC()
ResponseSuccess(c)
}
// Status 获取服务器状态
func Status(c *gin.Context) {
r := srv.Base.Status()
ResponseSuccessWithData(c, r)
}

View File

@ -1,23 +0,0 @@
package controller
import (
service2 "myschools.me/suguo/gofound/web/service"
)
var srv *Services
type Services struct {
Base *service2.Base
Index *service2.Index
Database *service2.Database
Word *service2.Word
}
func NewServices() {
srv = &Services{
Base: service2.NewBase(),
Index: service2.NewIndex(),
Database: service2.NewDatabase(),
Word: service2.NewWord(),
}
}

View File

@ -1,19 +0,0 @@
package router
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/web/controller"
)
// InitBaseRouter 基础管理路由
func InitBaseRouter(Router *gin.RouterGroup) {
BaseRouter := Router.Group("")
{
BaseRouter.GET("/", controller.Welcome)
BaseRouter.POST("query", controller.Query)
BaseRouter.GET("status", controller.Status)
BaseRouter.GET("gc", controller.GC)
}
}

View File

@ -1,17 +0,0 @@
package router
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/web/controller"
)
// InitDatabaseRouter 数据库路由
func InitDatabaseRouter(Router *gin.RouterGroup) {
databaseRouter := Router.Group("db")
{
databaseRouter.GET("list", controller.DBS) // 查看数据库
databaseRouter.GET("drop", controller.DatabaseDrop) // 删除数据库
databaseRouter.GET("create", controller.DatabaseCreate) // 添加数据库
}
}

View File

@ -1,17 +0,0 @@
package router
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/web/controller"
)
// InitIndexRouter 索引路由
func InitIndexRouter(Router *gin.RouterGroup) {
indexRouter := Router.Group("index")
{
indexRouter.POST("", controller.AddIndex) // 添加单条索引
indexRouter.POST("batch", controller.BatchAddIndex) // 批量添加索引
indexRouter.POST("remove", controller.RemoveIndex) // 删除索引
}
}

View File

@ -1,52 +0,0 @@
package router
import (
"log"
"strings"
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/web/controller"
"myschools.me/suguo/gofound/web/middleware"
"github.com/gin-contrib/gzip"
"github.com/gin-gonic/gin"
)
// SetupRouter 路由管理
func SetupRouter() *gin.Engine {
if global.CONFIG.Debug {
gin.SetMode(gin.DebugMode)
} else {
gin.SetMode(gin.ReleaseMode)
}
router := gin.Default()
// 启用GZIP压缩
if global.CONFIG.EnableGzip {
router.Use(gzip.Gzip(gzip.DefaultCompression))
}
var handlers []gin.HandlerFunc
auths := strings.Split(global.CONFIG.Auth, ":")
handlers = append(handlers, gin.BasicAuth(
gin.Accounts{
auths[0]: auths[1],
},
),
)
log.Println("Enable Auth:", global.CONFIG.Auth)
// 分组管理 中间件管理
router.Use(middleware.Cors(), middleware.Exception())
router.POST("/", controller.Query)
group := router.Group("/api", handlers...)
{
InitBaseRouter(group) // 基础管理
InitIndexRouter(group) // 索引管理
InitDatabaseRouter(group) // 数据库管理
InitWordRouter(group) // 分词管理
}
log.Printf("API Url: \t http://%v/api", global.CONFIG.Addr)
return router
}

View File

@ -1,15 +0,0 @@
package router
import (
"github.com/gin-gonic/gin"
"myschools.me/suguo/gofound/web/controller"
)
// InitWordRouter 分词路由
func InitWordRouter(Router *gin.RouterGroup) {
wordRouter := Router.Group("word")
{
wordRouter.GET("cut", controller.WordCut)
}
}

View File

@ -1,56 +0,0 @@
package service
import (
"os"
"runtime"
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/searcher"
"myschools.me/suguo/gofound/searcher/model"
"myschools.me/suguo/gofound/searcher/system"
)
// Base 基础管理
type Base struct {
Container *searcher.Container
Callback func() map[string]interface{}
}
func NewBase() *Base {
return &Base{
Container: global.Container,
Callback: Callback,
}
}
// Query 查询
func (b *Base) Query(request *model.SearchRequest) (*model.SearchResult, error) {
return b.Container.GetDataBase(request.Database).MultiSearch(request)
}
// GC 释放GC
func (b *Base) GC() {
runtime.GC()
}
// Status 获取服务器状态
func (b *Base) Status() map[string]interface{} {
var m runtime.MemStats
runtime.ReadMemStats(&m)
s := b.Callback()
r := map[string]interface{}{
"memory": system.GetMemStat(),
"cpu": system.GetCPUStatus(),
"disk": system.GetDiskStat(),
"system": s,
}
return r
}
// Restart 重启服务
func (b *Base) Restart() {
// TODD 未实现
os.Exit(0)
}

View File

@ -1,34 +0,0 @@
package service
import (
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/searcher"
)
type Database struct {
Container *searcher.Container
}
func NewDatabase() *Database {
return &Database{
Container: global.Container,
}
}
// Show 查看数据库
func (d *Database) Show() map[string]*searcher.Engine {
return d.Container.GetDataBases()
}
// Drop 删除数据库
func (d *Database) Drop(dbName string) error {
if err := d.Container.DropDataBase(dbName); err != nil {
return err
}
return nil
}
// Create 创建数据库
func (d *Database) Create(dbName string) *searcher.Engine {
return d.Container.GetDataBase(dbName)
}

View File

@ -1,42 +0,0 @@
package service
import (
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/searcher"
"myschools.me/suguo/gofound/searcher/model"
)
type Index struct {
Container *searcher.Container
}
func NewIndex() *Index {
return &Index{
Container: global.Container,
}
}
// AddIndex 添加索引
func (i *Index) AddIndex(dbName string, request *model.IndexDoc) error {
return i.Container.GetDataBase(dbName).IndexDocument(request)
}
// BatchAddIndex 批次添加索引
func (i *Index) BatchAddIndex(dbName string, documents []*model.IndexDoc) error {
db := i.Container.GetDataBase(dbName)
for _, doc := range documents {
if err := db.IndexDocument(doc); err != nil {
return err
}
}
return nil
}
// RemoveIndex 删除索引
func (i *Index) RemoveIndex(dbName string, data *model.RemoveIndexModel) error {
db := i.Container.GetDataBase(dbName)
if err := db.RemoveIndex(data.Id); err != nil {
return err
}
return nil
}

View File

@ -1,21 +0,0 @@
package service
import (
"myschools.me/suguo/gofound/global"
"myschools.me/suguo/gofound/searcher"
)
type Word struct {
Container *searcher.Container
}
func NewWord() *Word {
return &Word{
Container: global.Container,
}
}
// WordCut 分词
func (w *Word) WordCut(keyword string) []string {
return w.Container.Tokenizer.Cut(keyword)
}