提交 fa4dc110 authored 作者: 赵雪如's avatar 赵雪如

Initial commit

上级
流水线 #299 已取消 于阶段
*.exe
*.exe~
*.dll
*.so
*.dylib
*.test
*.out
.idea
.vscode
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
*.iml
\ No newline at end of file
package addr
import (
"gitlab.jxhh.com/zhaoxueru/address.git/areaMap"
"regexp"
"strconv"
"strings"
)
type Address struct {
IdNumber string `json:"id_number"`
Mobile string `json:"mobile"`
PostCode string `json:"post_code"`
Name string `json:"name"`
Address string `json:"address"`
Province string `json:"province"`
ProvinceCode int `json:"province_code"`
City string `json:"city"`
CityCode int `json:"city_code"`
Region string `json:"region"`
RegionCode int `json:"region_code"`
Street string `json:"street"`
StreetCode int `json:"street_code"`
}
// FilterWord 需要过滤掉收货地址中的常用说明字符,排除干扰词
var FilterWord = []string{"身份证号", "地址", "收货人", "收件人", "收货", "邮编", "电话", "手机", "手机号", "手机号码", "身份证号码", "身份证号", "身份证", ":", ":", ";", ";", ",", ",", "。", "."}
// Decompose 分离手机号(座机),身份证号,姓名,地址等信息
func Decompose(info *Address, str string) *Address {
//1. 过滤掉收货地址中的常用说明字符,排除干扰词
for _, value := range FilterWord {
str = strings.Replace(str, value, " ", -1)
}
//2. 多个空白字符(包括空格\r\n\t)换成一个空格
reg := regexp.MustCompile(`\s+`)
str = strings.TrimSpace(reg.ReplaceAllString(str, " "))
//8. 处理address 特殊符号问题
reg = regexp.MustCompile(`[~!@#$%^&*()_\-+=<>?:"{}|,.\/;'\\[\]·~!@#¥%……&*()—\-+={}|《》?:“”【】、;‘',。、]`)
str = reg.ReplaceAllString(str, "")
//3. 去除手机号码中的短横线 如0136-3333-6666 主要针对苹果手机
reg = regexp.MustCompile(`0-|0?(\d{3})-(\d{4})-(\d{4})`)
str = reg.ReplaceAllString(str, "$1$2$3")
//4. 提取中国境内身份证号码
reg = regexp.MustCompile(`(?i)\d{18}|\d{17}X`)
IdNumber := reg.FindString(str)
str = strings.Replace(str, IdNumber, "", -1)
info.IdNumber = strings.ToUpper(IdNumber)
//5. 提取11位手机号码或者7位以上座机号
reg = regexp.MustCompile(`\d{7,11}|\d{3,4}-\d{6,8}`)
mobile := reg.FindString(str)
str = strings.Replace(str, mobile, "", -1)
info.Mobile = mobile
//6. 提取6位邮编 邮编也可用后面解析出的省市区地址从数据库匹配出
reg = regexp.MustCompile(`\d{6}`)
postcode := reg.FindString(str)
str = strings.Replace(str, postcode, "", -1)
info.PostCode = postcode
//再次把2个及其以上的空格合并成一个,并首位TRIM
reg = regexp.MustCompile(` {2,}`)
str = strings.TrimSpace(reg.ReplaceAllString(str, " "))
//7. 按照空格切分 长度长的为地址 短的为姓名 因为不是基于自然语言分析,所以采取统计学上高概率的方案
r := strings.Split(str, " ")
name := r[0]
for _, v := range r {
if len(v) < len(name) {
name = v
}
}
if len(r) <= 1 {
info.Address = r[0]
return info
}
info.Name = name
address := strings.TrimSpace(strings.Replace(str, name, "", -1))
info.Address = address
return info
}
// Smart 智能解析
func Smart(str string) *Address {
var info Address
info = *Decompose(&info, str)
Parse(&info)
return &info
}
// Parse 智能解析出省市区+街道地址
func Parse(address *Address) *Address {
// 匹配所有省级
pReg := regexp.MustCompile(`.+?(省|市|自治区|特别行政区|区)`)
pArr := pReg.FindAllString(address.Address, -1)
// 匹配所有市级
// 由于该匹配可能会遗漏部分,所以合并省级匹配
cReg := regexp.MustCompile(`.+?(省|市|自治州|州|地区|盟|县|自治县|区|林区)`)
cArr := append(cReg.FindAllString(address.Address, -1), pArr...)
// 匹配所有区县级
// 由于该匹配可能会遗漏部分(如:东乡区)所以合并市级匹配
rReg := regexp.MustCompile(`.+?(市|县|自治县|旗|自治旗|区|林区|特区|街道|镇|乡)`)
rArr := append(rReg.FindAllString(address.Address, -1), cArr...)
//// 匹配所有乡镇街道
//sReg := regexp.MustCompile(`.+?(自治县|旗|自治旗|区|林区|特区|道|镇|乡|场|所|厂|局|郊|基地|监狱|会|园|城|库|亭|矿|处|直|岛|铝|木|中心|团|公司|内|室|站|队|尾|湾|街|村|屯)`)
//sArr := append(sReg.FindAllString(address.Address, -1), rArr...)
// 处理区县级
I:
for _, r := range rArr {
if r1, ok := areaMap.RegionByName[r]; ok && len(r1) == 1 {
address.Region = r1[0].Name
address.RegionCode = r1[0].Id
address.PostCode = strconv.Itoa(r1[0].Zipcode)
getAddressById(address, r1[0].Pid, city)
break
} else if ok {
for _, r2 := range r1 {
address.Region = r2.Name
address.RegionCode = r2.Id
address.PostCode = strconv.Itoa(r1[0].Zipcode)
getAddressById(address, r2.Pid, city)
for _, v := range cArr {
if address.City == v {
break I
}
}
}
}
}
// 处理市级
if address.City == "" {
for _, c := range cArr {
if r1, ok := areaMap.CityByName[c]; ok {
address.City = r1[0].Name
address.CityCode = r1[0].Id
address.PostCode = strconv.Itoa(r1[0].Zipcode)
getAddressById(address, r1[0].Pid, province)
getAddressByPid(address, r1[0].Id, region, rArr)
break
}
}
}
// 处理省级
if address.Province == "" {
for _, p := range pArr {
if r1, ok := areaMap.ProvinceByName[p]; ok {
address.Province = r1[0].Name
address.ProvinceCode = r1[0].Id
getAddressByPid(address, r1[0].Id, city, cArr)
getAddressByPid(address, r1[0].Id, region, rArr)
break
}
}
}
return address
}
const (
// 定义map等级常量
province = "province"
city = "city"
region = "region"
street = "street"
)
// 根据id获取地址信息
func getAddressById(address *Address, id int, rank string) *Address {
if rank == province {
info := areaMap.ProvinceById[id]
address.Province = info.Name
address.ProvinceCode = id
}
if rank == city {
info := areaMap.CityById[id]
address.City = info.Name
address.CityCode = id
getAddressById(address, info.Pid, province)
}
if rank == region {
info := areaMap.RegionById[id]
address.Region = info.Name
address.RegionCode = id
getAddressById(address, info.Pid, city)
}
return address
}
// 根据pid获取下一级行政地址信息
func getAddressByPid(address *Address, pid int, rank string, arr []string) *Address {
if rank == city && address.City == "" {
for _, addr := range arr {
for _, info := range areaMap.CityByPid[pid] {
if strings.Contains(info.Name, addr) {
address.City = info.Name
address.CityCode = info.Id
address.PostCode = strconv.Itoa(info.Zipcode)
return address
}
}
}
}
if rank == region && address.Region == "" {
for _, addr := range arr {
for _, info := range areaMap.RegionByPid[pid] {
if strings.Contains(info.Name, addr) {
address.Region = info.Name
address.RegionCode = info.Id
address.PostCode = strconv.Itoa(info.Zipcode)
return address
}
}
}
}
return address
}
package addr
import (
"encoding/json"
"fmt"
"testing"
"time"
)
func TestSmart(t *testing.T) {
s := make([]string, 0)
s = []string{
"云南--红河州--建水县--临安镇翠屏农贸市场3号门",
"云南红河州建水县临安镇翠屏农贸市场3号门",
}
startT := time.Now() //计算当前时间
for _, v := range s {
marshal, err := json.Marshal(Smart(v))
if err != nil {
return
}
fmt.Println(string(marshal))
}
tc := time.Since(startT) //计算解析耗时
fmt.Printf("time cost = %v\n", tc)
}
差异被折叠。
// Package areaMap 该文件是由go generate自动生成的,请勿直接修改代码!!!
// 如需更新请更新/data文件的数据源,然后在/generate下执行 make all
package areaMap
type ProvinceId struct {
Name string `json:"name"`
Pid int `json:"pid"`
}
type ProvincePid struct {
Name string `json:"name"`
Id int `json:"id"`
}
type ProvinceName struct {
Name string `json:"name"`
Id int `json:"id"`
Pid int `json:"pid"`
}
var ProvinceById = map[int]ProvinceId{
1:{Name:"北京",Pid:0},
2:{Name:"上海",Pid:0},
3:{Name:"天津",Pid:0},
4:{Name:"重庆",Pid:0},
5:{Name:"河北",Pid:0},
6:{Name:"山西",Pid:0},
7:{Name:"河南",Pid:0},
8:{Name:"辽宁",Pid:0},
9:{Name:"吉林",Pid:0},
10:{Name:"黑龙江",Pid:0},
11:{Name:"内蒙古",Pid:0},
12:{Name:"江苏",Pid:0},
13:{Name:"山东",Pid:0},
14:{Name:"安徽",Pid:0},
15:{Name:"浙江",Pid:0},
16:{Name:"福建",Pid:0},
17:{Name:"湖北",Pid:0},
18:{Name:"湖南",Pid:0},
19:{Name:"广东",Pid:0},
20:{Name:"广西",Pid:0},
21:{Name:"江西",Pid:0},
22:{Name:"四川",Pid:0},
23:{Name:"海南",Pid:0},
24:{Name:"贵州",Pid:0},
25:{Name:"云南",Pid:0},
26:{Name:"西藏",Pid:0},
27:{Name:"陕西",Pid:0},
28:{Name:"甘肃",Pid:0},
29:{Name:"青海",Pid:0},
30:{Name:"宁夏",Pid:0},
31:{Name:"新疆",Pid:0},
32:{Name:"台湾",Pid:0},
84:{Name:"钓鱼岛",Pid:0},
52993:{Name:"港澳",Pid:0},
}
var ProvinceByPid = map[int][]ProvincePid{
0:{{Name:"北京",Id:1},{Name:"上海",Id:2},{Name:"天津",Id:3},{Name:"重庆",Id:4},{Name:"河北",Id:5},{Name:"山西",Id:6},{Name:"河南",Id:7},{Name:"辽宁",Id:8},{Name:"吉林",Id:9},{Name:"黑龙江",Id:10},{Name:"内蒙古",Id:11},{Name:"江苏",Id:12},{Name:"山东",Id:13},{Name:"安徽",Id:14},{Name:"浙江",Id:15},{Name:"福建",Id:16},{Name:"湖北",Id:17},{Name:"湖南",Id:18},{Name:"广东",Id:19},{Name:"广西",Id:20},{Name:"江西",Id:21},{Name:"四川",Id:22},{Name:"海南",Id:23},{Name:"贵州",Id:24},{Name:"云南",Id:25},{Name:"西藏",Id:26},{Name:"陕西",Id:27},{Name:"甘肃",Id:28},{Name:"青海",Id:29},{Name:"宁夏",Id:30},{Name:"新疆",Id:31},{Name:"台湾",Id:32},{Name:"钓鱼岛",Id:84},{Name:"港澳",Id:52993}},
}
var ProvinceByName = map[string][]ProvinceName{
"河南":{{Name:"河南",Id:7,Pid:0}},
"安徽":{{Name:"安徽",Id:14,Pid:0}},
"江西":{{Name:"江西",Id:21,Pid:0}},
"陕西":{{Name:"陕西",Id:27,Pid:0}},
"港澳":{{Name:"港澳",Id:52993,Pid:0}},
"江苏":{{Name:"江苏",Id:12,Pid:0}},
"四川":{{Name:"四川",Id:22,Pid:0}},
"甘肃":{{Name:"甘肃",Id:28,Pid:0}},
"钓鱼岛":{{Name:"钓鱼岛",Id:84,Pid:0}},
"上海":{{Name:"上海",Id:2,Pid:0}},
"辽宁":{{Name:"辽宁",Id:8,Pid:0}},
"吉林":{{Name:"吉林",Id:9,Pid:0}},
"黑龙江":{{Name:"黑龙江",Id:10,Pid:0}},
"山东":{{Name:"山东",Id:13,Pid:0}},
"湖南":{{Name:"湖南",Id:18,Pid:0}},
"海南":{{Name:"海南",Id:23,Pid:0}},
"西藏":{{Name:"西藏",Id:26,Pid:0}},
"山西":{{Name:"山西",Id:6,Pid:0}},
"内蒙古":{{Name:"内蒙古",Id:11,Pid:0}},
"贵州":{{Name:"贵州",Id:24,Pid:0}},
"云南":{{Name:"云南",Id:25,Pid:0}},
"新疆":{{Name:"新疆",Id:31,Pid:0}},
"台湾":{{Name:"台湾",Id:32,Pid:0}},
"天津":{{Name:"天津",Id:3,Pid:0}},
"宁夏":{{Name:"宁夏",Id:30,Pid:0}},
"北京":{{Name:"北京",Id:1,Pid:0}},
"河北":{{Name:"河北",Id:5,Pid:0}},
"福建":{{Name:"福建",Id:16,Pid:0}},
"湖北":{{Name:"湖北",Id:17,Pid:0}},
"广西":{{Name:"广西",Id:20,Pid:0}},
"青海":{{Name:"青海",Id:29,Pid:0}},
"重庆":{{Name:"重庆",Id:4,Pid:0}},
"浙江":{{Name:"浙江",Id:15,Pid:0}},
"广东":{{Name:"广东",Id:19,Pid:0}},
}
差异被折叠。
差异被折叠。
{"1":{"name":"北京","pid":0,"zipcode":0},"10":{"name":"黑龙江","pid":0,"zipcode":0},"11":{"name":"内蒙古","pid":0,"zipcode":0},"12":{"name":"江苏","pid":0,"zipcode":0},"13":{"name":"山东","pid":0,"zipcode":0},"14":{"name":"安徽","pid":0,"zipcode":0},"15":{"name":"浙江","pid":0,"zipcode":0},"16":{"name":"福建","pid":0,"zipcode":0},"17":{"name":"湖北","pid":0,"zipcode":0},"18":{"name":"湖南","pid":0,"zipcode":0},"19":{"name":"广东","pid":0,"zipcode":0},"2":{"name":"上海","pid":0,"zipcode":0},"20":{"name":"广西","pid":0,"zipcode":0},"21":{"name":"江西","pid":0,"zipcode":0},"22":{"name":"四川","pid":0,"zipcode":0},"23":{"name":"海南","pid":0,"zipcode":0},"24":{"name":"贵州","pid":0,"zipcode":0},"25":{"name":"云南","pid":0,"zipcode":0},"26":{"name":"西藏","pid":0,"zipcode":0},"27":{"name":"陕西","pid":0,"zipcode":0},"28":{"name":"甘肃","pid":0,"zipcode":0},"29":{"name":"青海","pid":0,"zipcode":0},"3":{"name":"天津","pid":0,"zipcode":0},"30":{"name":"宁夏","pid":0,"zipcode":0},"31":{"name":"新疆","pid":0,"zipcode":0},"32":{"name":"台湾","pid":0,"zipcode":0},"4":{"name":"重庆","pid":0,"zipcode":0},"5":{"name":"河北","pid":0,"zipcode":0},"52993":{"name":"港澳","pid":0,"zipcode":0},"6":{"name":"山西","pid":0,"zipcode":0},"7":{"name":"河南","pid":0,"zipcode":0},"8":{"name":"辽宁","pid":0,"zipcode":0},"84":{"name":"钓鱼岛","pid":0,"zipcode":0},"9":{"name":"吉林","pid":0,"zipcode":0}}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
package autoCode
import (
"bufio"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"reflect"
"sort"
"strconv"
"strings"
)
type AreaInfoId struct {
Name string `json:"name"`
Pid int `json:"pid"`
Zipcode int `json:"zipcode"`
}
type AreaInfoPid struct {
Name string `json:"name"`
Id int `json:"id"`
Zipcode int `json:"zipcode"`
}
type AreaInfoName struct {
Name string `json:"name"`
Id int `json:"id"`
Pid int `json:"pid"`
Zipcode int `json:"zipcode"`
}
// 自动生成json数据源文件地址
const filePath = "../data/"
// 自动生成代码地址
const packageName = "areaMap"
const codePath = "../areaMap/"
// 定义需要生成map的行政等级
var ranks = []string{"city", "province", "region"}
// AutoAreaMap 自动生成行政区划map
func AutoAreaMap() {
for _, rank := range ranks {
data, err := ioutil.ReadFile(filePath + rank)
if err != nil {
fmt.Println("读取json文件失败,行政等级为:"+rank+",请检查文件!---", err)
return
}
m := make(map[int]AreaInfoId)
err = json.Unmarshal(data, &m)
if err != nil {
fmt.Println("json序列化数据源失败!请检出数据!path:"+filePath+rank+"---", err)
return
}
// 让map数据根据key排序
var keys []int
for k := range m {
keys = append(keys, k)
}
sort.Ints(keys)
str := ""
// 构建package
str += "// Package " + packageName + " 该文件是由go generate自动生成的,请勿直接修改代码!!!\n"
str += "// 如需更新请更新/data文件的数据源,然后在/generate下执行 make all\n"
str += "package areaMap\n\n"
// 构建struct
str += "type " + strings.Title(rank) + "Id struct {\n"
str += "Name string `json:\"name\"`\n"
str += "Pid int `json:\"pid\"`\n"
if rank != "province" {
str += "Zipcode int `json:\"zipcode\"`\n"
}
str += "}\n\n"
str += "type " + strings.Title(rank) + "Pid struct {\n"
str += "Name string `json:\"name\"`\n"
str += "Id int `json:\"id\"`\n"
if rank != "province" {
str += "Zipcode int `json:\"zipcode\"`\n"
}
str += "}\n\n"
str += "type " + strings.Title(rank) + "Name struct {\n"
str += "Name string `json:\"name\"`\n"
str += "Id int `json:\"id\"`\n"
str += "Pid int `json:\"pid\"`\n"
if rank != "province" {
str += "Zipcode int `json:\"zipcode\"`\n"
}
str += "}\n\n"
// 为构建pid索引树创造条件
str1Arr := make(map[int][]interface{})
// 为构建name索引树创造条件
str2Arr := make(map[string][]interface{})
// 构建map
// str是构建根据行政id来生成的索引树
str += "var " + strings.Title(rank) + "ById = map[int]" + strings.Title(rank) + "Id{\n"
for _, key := range keys {
var infoPid AreaInfoPid
infoPid.Id = key
infoPid.Name = m[key].Name
infoPid.Zipcode = m[key].Zipcode
str1Arr[m[key].Pid] = append(str1Arr[m[key].Pid], infoPid)
var infoName AreaInfoName
infoName.Id = key
infoName.Pid = m[key].Pid
infoName.Name = m[key].Name
infoName.Zipcode = m[key].Zipcode
str2Arr[m[key].Name] = append(str2Arr[m[key].Name], infoName)
t := reflect.TypeOf(m[key])
v := reflect.ValueOf(m[key])
name := t.Field(0).Name
name1 := v.Field(0).String()
pid := t.Field(1).Name
pid1 := v.Field(1).Int()
if rank == "province" {
str += strconv.Itoa(key) + ":{" + name + ":\"" + name1 + "\"," + pid + ":" + strconv.Itoa(int(pid1)) + "},\n"
continue
}
zipCode := t.Field(2).Name
zipCode1 := v.Field(2).Int()
str += strconv.Itoa(key) + ":{" + name + ":\"" + name1 + "\"," + pid + ":" + strconv.Itoa(int(pid1)) + "," + zipCode + ":" + strconv.Itoa(int(zipCode1)) + "},\n"
}
str += "}\n\n"
// str1是构建根据行政父id(pid)来生成的索引树
str1 := "var " + strings.Title(rank) + "ByPid = map[int][]" + strings.Title(rank) + "Pid{\n"
for key, value := range str1Arr {
str1 += strconv.Itoa(key) + ":{"
for _, value2 := range value {
t := reflect.TypeOf(value2)
v := reflect.ValueOf(value2)
name := t.Field(0).Name
name1 := v.Field(0).String()
id := t.Field(1).Name
id1 := v.Field(1).Int()
if rank == "province" {
str1 += "{" + name + ":\"" + name1 + "\"," + id + ":" + strconv.Itoa(int(id1)) + "},"
continue
}
zipCode := t.Field(2).Name
zipCode1 := v.Field(2).Int()
str1 += "{" + name + ":\"" + name1 + "\"," + id + ":" + strconv.Itoa(int(id1)) + "," + zipCode + ":" + strconv.Itoa(int(zipCode1)) + "},"
}
str1 = strings.TrimRight(str1, ",")
str1 += "},\n"
}
str1 += "}\n\n"
// str2是构建根据地名(name)来生成的索引树
str2 := "var " + strings.Title(rank) + "ByName = map[string][]" + strings.Title(rank) + "Name{\n"
for key, value := range str2Arr {
str2 += "\"" + key + "\":{"
for _, value2 := range value {
t := reflect.TypeOf(value2)
v := reflect.ValueOf(value2)
name := t.Field(0).Name
name1 := v.Field(0).String()
id := t.Field(1).Name
id1 := v.Field(1).Int()
pid := t.Field(2).Name
pid1 := v.Field(2).Int()
if rank == "province" {
str2 += "{" + name + ":\"" + name1 + "\"," + id + ":" + strconv.Itoa(int(id1)) + "," + pid + ":" + strconv.Itoa(int(pid1)) + "},"
continue
}
zipCode := t.Field(3).Name
zipCode1 := v.Field(3).Int()
str2 += "{" + name + ":\"" + name1 + "\"," + id + ":" + strconv.Itoa(int(id1)) + "," + pid + ":" + strconv.Itoa(int(pid1)) + "," + zipCode + ":" + strconv.Itoa(int(zipCode1)) + "},"
}
str2 = strings.TrimRight(str2, ",")
str2 += "},\n"
}
str2 += "}\n"
str = str + str1 + str2
// 尝试创建此路径
uploadDir := codePath
mkdirErr := os.MkdirAll(uploadDir, os.ModePerm)
if mkdirErr != nil {
fmt.Println(mkdirErr)
}
// 打开文件
file, err := os.OpenFile(codePath+rank+".go", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666)
if err != nil {
fmt.Println("文件打开失败", err)
}
//写入文件时,使用带缓存的 *Writer
write := bufio.NewWriter(file)
_, err = write.WriteString(str)
//Flush将缓存的文件真正写入到文件中
err = write.Flush()
//及时关闭file句柄
func(file *os.File) {
err := file.Close()
if err != nil {
fmt.Println("关闭file句柄失败!,可能回导致内存泄漏,请care一下!---", err)
}
}(file)
}
}
package main
import "gitlab.jxhh.com/zhaoxueru/address.git/generate/autoCode"
//go:generate go run main.go
func main() {
autoCode.AutoAreaMap()
}
module gitlab.jxhh.com/zhaoxueru/address.git
go 1.16
github.com/pupuk/addr v0.0.2 h1:JXNLsvnoQMArYTLah3I1XMcGvp7NPMSuJL6ScyOtB9g=
github.com/pupuk/addr v0.0.2/go.mod h1:vuyWWCeWTpXoNfZWM6Agg0XAYg7AAqVXMvGk58SW0b0=
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论