Добавлено: загрузка распарсенного расписания в БД
Изменено: почти адекватный парсинг расписания преподавателей
This commit is contained in:
parent
fef468f695
commit
160e08a52a
|
@ -1,5 +1,7 @@
|
|||
package database
|
||||
|
||||
import "time"
|
||||
|
||||
type User struct {
|
||||
L9Id int64 `xorm:"pk"`
|
||||
}
|
||||
|
@ -10,3 +12,22 @@ type TgUser struct {
|
|||
Name string
|
||||
PosTag string
|
||||
}
|
||||
|
||||
type Group struct {
|
||||
GroupId int64 `xorm:"pk"`
|
||||
groupName string
|
||||
specName string
|
||||
}
|
||||
|
||||
type Lesson struct {
|
||||
LessonId int64 `xorm:"pk autoincr"`
|
||||
Type string
|
||||
Name string
|
||||
GroupId int64
|
||||
Begin time.Time
|
||||
End time.Time
|
||||
TeacherId int64
|
||||
Place string
|
||||
Comment string
|
||||
SubGroup string
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ func Connect(user, pass, db string) *xorm.Engine {
|
|||
engine.ShowSQL(true)
|
||||
engine.SetMapper(names.SameMapper{})
|
||||
|
||||
err = engine.Sync(&User{}, &TgUser{})
|
||||
err = engine.Sync(&User{}, &TgUser{}, &Group{}, &Lesson{})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
package ssau_parser
|
||||
|
||||
import (
|
||||
"git.l9labs.ru/anufriev.g.a/l9_stud_bot/modules/database"
|
||||
"xorm.io/xorm"
|
||||
)
|
||||
|
||||
func uploadShedule(db *xorm.Engine, sh Shedule) {
|
||||
for _, line := range sh.Lessons {
|
||||
for _, lesson := range line {
|
||||
var pair database.Lesson
|
||||
for _, subLesson := range lesson.SubLessons {
|
||||
pair = database.Lesson{
|
||||
Begin: lesson.Begin,
|
||||
End: lesson.End,
|
||||
Type: subLesson.Type,
|
||||
Name: subLesson.Name,
|
||||
TeacherId: subLesson.TeacherId,
|
||||
Place: subLesson.Place,
|
||||
Comment: subLesson.Comment,
|
||||
SubGroup: subLesson.SubGroup,
|
||||
}
|
||||
for _, groupId := range subLesson.GroupId {
|
||||
pair.GroupId = groupId
|
||||
db.InsertOne(pair)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,6 +8,7 @@ import (
|
|||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
|
@ -84,24 +85,36 @@ func FindInRasp(query string) (RaspItems, error) {
|
|||
return list, nil
|
||||
}
|
||||
|
||||
func Connect(uri string, week int) (*goquery.Document, error) {
|
||||
// Connect to ssau.ru/rasp
|
||||
// Returns goquery.Document, is shedule a group shedule and its ID
|
||||
func Connect(uri string, week int) (*goquery.Document, bool, int64, error) {
|
||||
client := http.Client{}
|
||||
|
||||
req, err := http.NewRequest("GET", fmt.Sprintf("https://ssau.ru%s&selectedWeek=%d", uri, week), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, false, 0, err
|
||||
}
|
||||
req.Header.Add("User-Agent", "Mozilla/5.0")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, false, 0, err
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, false, 0, err
|
||||
}
|
||||
|
||||
return doc, nil
|
||||
var sheduleId int64
|
||||
var isGroup bool
|
||||
|
||||
sheduleId, err = strconv.ParseInt(uri[14:], 0, 64)
|
||||
if err != nil {
|
||||
return nil, false, 0, err
|
||||
}
|
||||
|
||||
isGroup = strings.Contains(uri, "group")
|
||||
|
||||
return doc, isGroup, sheduleId, nil
|
||||
}
|
||||
|
|
|
@ -19,18 +19,22 @@ type SubLesson struct {
|
|||
Type string
|
||||
Name string
|
||||
Place string
|
||||
TeacherID int64
|
||||
TeacherId int64
|
||||
GroupId []int64
|
||||
Comment string
|
||||
SubGroup string
|
||||
}
|
||||
|
||||
type Shedule struct {
|
||||
SpecName string
|
||||
Week int
|
||||
Lessons [][]Lesson
|
||||
IsGroup bool
|
||||
SheduleId int64
|
||||
SpecName string
|
||||
Week int
|
||||
Lessons [][]Lesson
|
||||
}
|
||||
|
||||
func Parse(doc *goquery.Document) (*Shedule, error) {
|
||||
// Parse goquery shedule site
|
||||
func Parse(doc *goquery.Document, isGroup bool, sheduleId int64) (*Shedule, error) {
|
||||
spec := doc.Find(".info-block__description div").First().Text()[1:]
|
||||
log.Println(spec)
|
||||
|
||||
|
@ -48,7 +52,7 @@ func Parse(doc *goquery.Document) (*Shedule, error) {
|
|||
|
||||
var lessons [][]SubLesson
|
||||
doc.Find(".schedule__item:not(.schedule__head)").Each(func(i int, s *goquery.Selection) {
|
||||
sl := ParseSubLesson(s)
|
||||
sl := ParseSubLesson(s, isGroup, sheduleId)
|
||||
lessons = append(lessons, sl)
|
||||
})
|
||||
|
||||
|
@ -82,7 +86,8 @@ func Parse(doc *goquery.Document) (*Shedule, error) {
|
|||
|
||||
var types = [4]string{"lect", "lab", "pract", "other"}
|
||||
|
||||
func ParseSubLesson(s *goquery.Selection) []SubLesson {
|
||||
// Parse shedule item
|
||||
func ParseSubLesson(s *goquery.Selection, isGroup bool, sheduleId int64) []SubLesson {
|
||||
var subs []SubLesson
|
||||
s.Find(".schedule__lesson").Each(func(j int, l *goquery.Selection) {
|
||||
var sublesson SubLesson
|
||||
|
@ -98,13 +103,31 @@ func ParseSubLesson(s *goquery.Selection) []SubLesson {
|
|||
}
|
||||
sublesson.Type = types[type_idx-1]
|
||||
|
||||
teacher := l.Find(".schedule__teacher a").AttrOr("href", "/rasp?staffId=")
|
||||
teacherId, err := strconv.ParseInt(teacher[14:], 0, 64)
|
||||
if err != nil {
|
||||
teacherId = 0
|
||||
}
|
||||
sublesson.TeacherID = teacherId
|
||||
var teacherId int64
|
||||
var groupId []int64
|
||||
|
||||
if isGroup {
|
||||
teacher := l.Find(".schedule__teacher a").AttrOr("href", "/rasp?staffId=")
|
||||
teacherId, err = strconv.ParseInt(teacher[14:], 0, 64)
|
||||
if err != nil {
|
||||
teacherId = 0
|
||||
}
|
||||
groupId = append(groupId, sheduleId)
|
||||
} else {
|
||||
teacherId = sheduleId
|
||||
l.Find("a.schedule__group").Each(func(k int, gr *goquery.Selection) {
|
||||
id, err := strconv.ParseInt(gr.AttrOr("href", "/rasp?groupId=")[14:], 0, 64)
|
||||
if err != nil {
|
||||
teacherId = 0
|
||||
}
|
||||
groupId = append(groupId, id)
|
||||
})
|
||||
}
|
||||
sublesson.TeacherId = teacherId
|
||||
sublesson.GroupId = groupId
|
||||
|
||||
// Я в рот ебал парсить это расписание, потому что у преподов решили номера подгрупп пихать
|
||||
// в ссылки на группу, а не в предназначенный для этого элемент
|
||||
subgroup := l.Find(".schedule__groups span").First().Text()
|
||||
if subgroup == " " {
|
||||
subgroup = ""
|
||||
|
|
|
@ -2,7 +2,11 @@ package ssau_parser
|
|||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"git.l9labs.ru/anufriev.g.a/l9_stud_bot/modules/database"
|
||||
"github.com/joho/godotenv"
|
||||
)
|
||||
|
||||
func TestFindInRasp(t *testing.T) {
|
||||
|
@ -19,7 +23,7 @@ func TestConnect(t *testing.T) {
|
|||
t.Error(err)
|
||||
}
|
||||
uri := list[0].Url
|
||||
_, err = Connect(uri, 3)
|
||||
_, _, _, err = Connect(uri, 3)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -31,12 +35,19 @@ func TestParse(t *testing.T) {
|
|||
t.Error(err)
|
||||
}
|
||||
uri := list[0].Url
|
||||
doc, err := Connect(uri, 3)
|
||||
doc, is, gr, err := Connect(uri, 5)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
_, err = Parse(doc)
|
||||
shedule, err := Parse(doc, is, gr)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if err := godotenv.Load(); err != nil {
|
||||
log.Fatal("No .env file found")
|
||||
}
|
||||
|
||||
engine := database.Connect(os.Getenv("MYSQL_USER"), os.Getenv("MYSQL_PASS"), os.Getenv("MYSQL_DB"))
|
||||
uploadShedule(engine, *shedule)
|
||||
}
|
||||
|
|
Reference in New Issue