Добавлено: загрузка распарсенного расписания в БД

Изменено: почти адекватный парсинг расписания преподавателей
This commit is contained in:
far-galaxy 2023-03-07 12:09:20 +04:00
parent fef468f695
commit 160e08a52a
6 changed files with 120 additions and 22 deletions

View File

@ -1,5 +1,7 @@
package database
import "time"
type User struct {
L9Id int64 `xorm:"pk"`
}
@ -10,3 +12,22 @@ type TgUser struct {
Name string
PosTag string
}
type Group struct {
GroupId int64 `xorm:"pk"`
groupName string
specName string
}
type Lesson struct {
LessonId int64 `xorm:"pk autoincr"`
Type string
Name string
GroupId int64
Begin time.Time
End time.Time
TeacherId int64
Place string
Comment string
SubGroup string
}

View File

@ -18,7 +18,7 @@ func Connect(user, pass, db string) *xorm.Engine {
engine.ShowSQL(true)
engine.SetMapper(names.SameMapper{})
err = engine.Sync(&User{}, &TgUser{})
err = engine.Sync(&User{}, &TgUser{}, &Group{}, &Lesson{})
if err != nil {
log.Fatal(err)
}

View File

@ -0,0 +1,30 @@
package ssau_parser
import (
"git.l9labs.ru/anufriev.g.a/l9_stud_bot/modules/database"
"xorm.io/xorm"
)
func uploadShedule(db *xorm.Engine, sh Shedule) {
for _, line := range sh.Lessons {
for _, lesson := range line {
var pair database.Lesson
for _, subLesson := range lesson.SubLessons {
pair = database.Lesson{
Begin: lesson.Begin,
End: lesson.End,
Type: subLesson.Type,
Name: subLesson.Name,
TeacherId: subLesson.TeacherId,
Place: subLesson.Place,
Comment: subLesson.Comment,
SubGroup: subLesson.SubGroup,
}
for _, groupId := range subLesson.GroupId {
pair.GroupId = groupId
db.InsertOne(pair)
}
}
}
}
}

View File

@ -8,6 +8,7 @@ import (
"log"
"net/http"
"net/url"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
@ -84,24 +85,36 @@ func FindInRasp(query string) (RaspItems, error) {
return list, nil
}
func Connect(uri string, week int) (*goquery.Document, error) {
// Connect to ssau.ru/rasp
// Returns goquery.Document, is shedule a group shedule and its ID
func Connect(uri string, week int) (*goquery.Document, bool, int64, error) {
client := http.Client{}
req, err := http.NewRequest("GET", fmt.Sprintf("https://ssau.ru%s&selectedWeek=%d", uri, week), nil)
if err != nil {
return nil, err
return nil, false, 0, err
}
req.Header.Add("User-Agent", "Mozilla/5.0")
resp, err := client.Do(req)
if err != nil {
return nil, err
return nil, false, 0, err
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
return nil, false, 0, err
}
return doc, nil
var sheduleId int64
var isGroup bool
sheduleId, err = strconv.ParseInt(uri[14:], 0, 64)
if err != nil {
return nil, false, 0, err
}
isGroup = strings.Contains(uri, "group")
return doc, isGroup, sheduleId, nil
}

View File

@ -19,18 +19,22 @@ type SubLesson struct {
Type string
Name string
Place string
TeacherID int64
TeacherId int64
GroupId []int64
Comment string
SubGroup string
}
type Shedule struct {
SpecName string
Week int
Lessons [][]Lesson
IsGroup bool
SheduleId int64
SpecName string
Week int
Lessons [][]Lesson
}
func Parse(doc *goquery.Document) (*Shedule, error) {
// Parse goquery shedule site
func Parse(doc *goquery.Document, isGroup bool, sheduleId int64) (*Shedule, error) {
spec := doc.Find(".info-block__description div").First().Text()[1:]
log.Println(spec)
@ -48,7 +52,7 @@ func Parse(doc *goquery.Document) (*Shedule, error) {
var lessons [][]SubLesson
doc.Find(".schedule__item:not(.schedule__head)").Each(func(i int, s *goquery.Selection) {
sl := ParseSubLesson(s)
sl := ParseSubLesson(s, isGroup, sheduleId)
lessons = append(lessons, sl)
})
@ -82,7 +86,8 @@ func Parse(doc *goquery.Document) (*Shedule, error) {
var types = [4]string{"lect", "lab", "pract", "other"}
func ParseSubLesson(s *goquery.Selection) []SubLesson {
// Parse shedule item
func ParseSubLesson(s *goquery.Selection, isGroup bool, sheduleId int64) []SubLesson {
var subs []SubLesson
s.Find(".schedule__lesson").Each(func(j int, l *goquery.Selection) {
var sublesson SubLesson
@ -98,13 +103,31 @@ func ParseSubLesson(s *goquery.Selection) []SubLesson {
}
sublesson.Type = types[type_idx-1]
teacher := l.Find(".schedule__teacher a").AttrOr("href", "/rasp?staffId=")
teacherId, err := strconv.ParseInt(teacher[14:], 0, 64)
if err != nil {
teacherId = 0
}
sublesson.TeacherID = teacherId
var teacherId int64
var groupId []int64
if isGroup {
teacher := l.Find(".schedule__teacher a").AttrOr("href", "/rasp?staffId=")
teacherId, err = strconv.ParseInt(teacher[14:], 0, 64)
if err != nil {
teacherId = 0
}
groupId = append(groupId, sheduleId)
} else {
teacherId = sheduleId
l.Find("a.schedule__group").Each(func(k int, gr *goquery.Selection) {
id, err := strconv.ParseInt(gr.AttrOr("href", "/rasp?groupId=")[14:], 0, 64)
if err != nil {
teacherId = 0
}
groupId = append(groupId, id)
})
}
sublesson.TeacherId = teacherId
sublesson.GroupId = groupId
// Я в рот ебал парсить это расписание, потому что у преподов решили номера подгрупп пихать
// в ссылки на группу, а не в предназначенный для этого элемент
subgroup := l.Find(".schedule__groups span").First().Text()
if subgroup == " " {
subgroup = ""

View File

@ -2,7 +2,11 @@ package ssau_parser
import (
"log"
"os"
"testing"
"git.l9labs.ru/anufriev.g.a/l9_stud_bot/modules/database"
"github.com/joho/godotenv"
)
func TestFindInRasp(t *testing.T) {
@ -19,7 +23,7 @@ func TestConnect(t *testing.T) {
t.Error(err)
}
uri := list[0].Url
_, err = Connect(uri, 3)
_, _, _, err = Connect(uri, 3)
if err != nil {
t.Error(err)
}
@ -31,12 +35,19 @@ func TestParse(t *testing.T) {
t.Error(err)
}
uri := list[0].Url
doc, err := Connect(uri, 3)
doc, is, gr, err := Connect(uri, 5)
if err != nil {
t.Error(err)
}
_, err = Parse(doc)
shedule, err := Parse(doc, is, gr)
if err != nil {
t.Error(err)
}
if err := godotenv.Load(); err != nil {
log.Fatal("No .env file found")
}
engine := database.Connect(os.Getenv("MYSQL_USER"), os.Getenv("MYSQL_PASS"), os.Getenv("MYSQL_DB"))
uploadShedule(engine, *shedule)
}