diff --git a/modules/ssau_parser/connecter.go b/modules/ssau_parser/connecter.go new file mode 100644 index 0000000..cb2614d --- /dev/null +++ b/modules/ssau_parser/connecter.go @@ -0,0 +1,146 @@ +package ssau_parser + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +// Результаты поиска +type SearchResults []struct { + Id int64 + Url string + Text string +} + +// Страница с расписанием и служебными хвостами +type Page struct { + ID int64 + IsGroup bool + Week int + Doc *goquery.Document +} + +// Адрес основного сайта (прод или тестовый) +var headURL = "https://ssau.ru" + +// Поиск расписания группы или преподавателя через ssau.ru/rasp/search +func SearchInRasp(query string) (SearchResults, error) { + client := http.Client{} + + // Сначала заходим на сам сайт и получаем токены, чтобы нас посчитали человеком + req, err := http.NewRequest("GET", headURL+"/rasp", nil) + if err != nil { + return nil, err + } + req.Header.Add("User-Agent", "Mozilla/5.0") + + resp, err := client.Do(req) + if err != nil { + return nil, err + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, err + } + csrf, exists := doc.Find("meta[name='csrf-token']").Attr("content") + if !exists { + return nil, errors.New("missed csrf") + } + + parm := url.Values{} + parm.Add("text", query) + + // Теперь можно обращаться к подобию API + req, err = http.NewRequest("POST", headURL+"/rasp/search", strings.NewReader(parm.Encode())) + if err != nil { + return nil, err + } + + for _, cookie := range resp.Cookies() { + req.AddCookie(cookie) + } + req.Header.Add("Content-Type", "application/x-www-form-urlencoded") + req.Header.Add("User-Agent", "Mozilla/5.0") + req.Header.Add("Accept", "application/json") + req.Header.Add("X-CSRF-TOKEN", csrf) + + resp, err = client.Do(req) + if err != nil { + return nil, err + } + + var list SearchResults + if resp.StatusCode == 200 { + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + if err := json.Unmarshal(body, &list); err != nil { + return nil, err + } + + } else { + return nil, fmt.Errorf("responce: %s", resp.Status) + } + + return list, nil +} + +// Загрузка страницы с расписанием из ssau.ru/rasp по URI и номеру недели (в семестре) +func DownloadShedule(uri string, week int) (Page, error) { + var page Page + var err error + + page.ID, err = strconv.ParseInt(uri[14:], 0, 64) + if err != nil { + return page, err + } + page.IsGroup = strings.Contains(uri, "group") + page.Week = week + + client := http.Client{} + req, err := http.NewRequest("GET", fmt.Sprintf("%s%s&selectedWeek=%d", headURL, uri, week), nil) + if err != nil { + return page, err + } + req.Header.Add("User-Agent", "Mozilla/5.0") + + resp, err := client.Do(req) + if err != nil { + return page, err + } + + page.Doc, err = goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return page, err + } + + return page, nil +} + +// Загрузка страницы с расписанием из ssau.ru/rasp по ID и номеру недели (в семестре) +func DownloadSheduleById(id int64, isGroup bool, week int) (Page, error) { + uri := GenerateUri(id, isGroup) + return DownloadShedule(uri, week) +} + +// Создать URI по ID и условию группа/преподаватель +func GenerateUri(id int64, isGroup bool) string { + var uri string + if isGroup { + uri = fmt.Sprintf("/rasp?groupId=%d", id) + } else { + uri = fmt.Sprintf("/rasp?staffId=%d", id) + } + return uri +} diff --git a/modules/ssau_parser/database.go b/modules/ssau_parser/database.go index 38af74e..d251a9b 100644 --- a/modules/ssau_parser/database.go +++ b/modules/ssau_parser/database.go @@ -1,7 +1,6 @@ package ssau_parser import ( - "fmt" "log" "strings" @@ -9,7 +8,7 @@ import ( "xorm.io/xorm" ) -func UploadShedule(db *xorm.Engine, sh Shedule) error { +func UploadShedule(db *xorm.Engine, sh WeekShedule) error { err := addGroupOrTeacher(db, sh) if err != nil { return err @@ -19,7 +18,7 @@ func UploadShedule(db *xorm.Engine, sh Shedule) error { for _, line := range sh.Lessons { for _, lesson := range line { var pair database.Lesson - for _, subLesson := range lesson.SubLessons { + for _, subLesson := range lesson.Lessons { pair = database.Lesson{ Begin: lesson.Begin, End: lesson.End, @@ -35,11 +34,11 @@ func UploadShedule(db *xorm.Engine, sh Shedule) error { if !exists && subLesson.TeacherId != 0 { uri := GenerateUri(subLesson.TeacherId, true) - doc, _, _, err := Connect(uri, sh.Week) + doc, _, _, err := DownloadShedule(uri, sh.Week) if err != nil { return err } - var gr Shedule + var gr WeekShedule gr.IsGroup = false gr.SheduleId = subLesson.TeacherId GetSheduleInfo(doc, &gr) @@ -56,11 +55,11 @@ func UploadShedule(db *xorm.Engine, sh Shedule) error { if !exists { uri := GenerateUri(groupId, false) - doc, _, _, err := Connect(uri, sh.Week) + doc, _, _, err := DownloadShedule(uri, sh.Week) if err != nil { return err } - var gr Shedule + var gr WeekShedule gr.IsGroup = true gr.SheduleId = groupId GetSheduleInfo(doc, &gr) @@ -90,16 +89,6 @@ func UploadShedule(db *xorm.Engine, sh Shedule) error { return nil } -func GenerateUri(id int64, isTeacher bool) string { - var uri string - if isTeacher { - uri = fmt.Sprintf("/rasp?staffId=%d", id) - } else { - uri = fmt.Sprintf("/rasp?groupId=%d", id) - } - return uri -} - func isGroupExists(db *xorm.Engine, groupId int64) (bool, error) { var exists []database.Group err := db.Find(&exists, database.Group{GroupId: groupId}) @@ -120,7 +109,7 @@ func isTeacherExists(db *xorm.Engine, teacherId int64) (bool, error) { return len(exists) == 1, nil } -func addGroupOrTeacher(db *xorm.Engine, sh Shedule) error { +func addGroupOrTeacher(db *xorm.Engine, sh WeekShedule) error { if sh.IsGroup { exists, err := isGroupExists(db, sh.SheduleId) if err != nil { diff --git a/modules/ssau_parser/parser.go b/modules/ssau_parser/parser.go index f5ee16f..b5deb43 100644 --- a/modules/ssau_parser/parser.go +++ b/modules/ssau_parser/parser.go @@ -1,126 +1,179 @@ package ssau_parser import ( - "encoding/json" - "errors" - "fmt" - "io" - "log" - "net/http" - "net/url" "strconv" "strings" + "time" "github.com/PuerkitoBio/goquery" ) -type RaspList struct { - Items []RaspItems +type Pair struct { + Begin time.Time + End time.Time + NumInShedule int + Lessons []Lesson } -type RaspItems []struct { - Id int64 - Url string - Text string +type Lesson struct { + Type string + Name string + Place string + TeacherId int64 + GroupId []int64 + Comment string + SubGroup string } -func FindInRasp(query string) (RaspItems, error) { - client := http.Client{} +type WeekShedule struct { + IsGroup bool + SheduleId int64 + GroupName string + SpecName string + Week int + Lessons [][]Pair +} - req, err := http.NewRequest("GET", "https://ssau.ru/rasp", nil) - if err != nil { - return nil, err +// Получить полный номер группы и название специальности +// TODO: проверить, как это с преподами работает +func GetSheduleInfo(doc *goquery.Document, sh *WeekShedule) { + spec := doc.Find(".info-block__description div").First().Text() + if spec != "" { + spec = spec[1:] } - req.Header.Add("User-Agent", "Mozilla/5.0") + sh.SpecName = spec + sh.GroupName = strings.TrimSpace(doc.Find(".info-block__title").First().Text()) - resp, err := client.Do(req) - if err != nil { - return nil, err - } +} - doc, err := goquery.NewDocumentFromReader(resp.Body) - if err != nil { - return nil, err - } - csrf, exists := doc.Find("meta[name='csrf-token']").Attr("content") - if !exists { - return nil, errors.New("missed csrf") - } +// Соотнесение часа начала пары с его порядковым номером +var hourMap = map[int]int{8: 0, 9: 1, 11: 2, 13: 3, 15: 4, 17: 5, 18: 6, 20: 7} - parm := url.Values{} - parm.Add("text", query) - req, err = http.NewRequest("POST", "https://ssau.ru/rasp/search", strings.NewReader(parm.Encode())) - if err != nil { - return nil, err - } +// Парсинг страницы с расписанием +func Parse(p Page) (*WeekShedule, error) { + var sh WeekShedule + doc := p.Doc + GetSheduleInfo(doc, &sh) - for _, cookie := range resp.Cookies() { - req.AddCookie(cookie) - } - req.Header.Add("Content-Type", "application/x-www-form-urlencoded") - req.Header.Add("User-Agent", "Mozilla/5.0") - req.Header.Add("Accept", "application/json") - req.Header.Add("X-CSRF-TOKEN", csrf) + var raw_dates []string + doc.Find(".schedule__head-date").Each(func(i int, s *goquery.Selection) { + sh_date := s.Text() + raw_dates = append(raw_dates, sh_date) + }) - resp, err = client.Do(req) - if err != nil { - return nil, err - } + var raw_times []string + doc.Find(".schedule__time-item").Each(func(i int, s *goquery.Selection) { + sh_time := s.Text() + "+04" + raw_times = append(raw_times, sh_time) + }) - var list RaspItems - if resp.StatusCode == 200 { - body, err := io.ReadAll(resp.Body) + var lessons [][]Lesson + doc.Find(".schedule__item:not(.schedule__head)").Each(func(i int, s *goquery.Selection) { + sl := ParseSubLesson(s, p.IsGroup, p.ID) + lessons = append(lessons, sl) + }) + + var shedule [][]Pair + var firstNum int + + for t := 0; t < len(raw_times); t += 2 { + if t == 0 { + begin, err := time.Parse(" 15:04 -07", raw_times[t]) + if err != nil { + return nil, err + } + firstNum, _ = hourMap[begin.Hour()] + + } + var time_line []Pair + for d, date := range raw_dates { + begin_raw := date + raw_times[t] + begin, err := time.Parse(" 02.01.2006 15:04 -07", begin_raw) + if err != nil { + return nil, err + } + end_raw := date + raw_times[t+1] + end, err := time.Parse(" 02.01.2006 15:04 -07", end_raw) + if err != nil { + return nil, err + } + idx := (len(raw_dates))*t/2 + d + lesson := Pair{ + Begin: begin, + End: end, + NumInShedule: t/2 + firstNum, + Lessons: lessons[idx], + } + time_line = append(time_line, lesson) + } + shedule = append(shedule, time_line) + } + sh.IsGroup = p.IsGroup + sh.SheduleId = p.ID + sh.Week = p.Week + sh.Lessons = shedule + return &sh, nil +} + +var types = [4]string{"lect", "lab", "pract", "other"} + +// Парсинг занятия +func ParseSubLesson(s *goquery.Selection, isGroup bool, sheduleId int64) []Lesson { + var subs []Lesson + s.Find(".schedule__lesson").Each(func(j int, l *goquery.Selection) { + var sublesson Lesson + + name := l.Find("div.schedule__discipline").First() + sublesson.Name = name.Text()[1:] + l_type := name.AttrOr("class", "lesson-color-type-4") + t := strings.Split(l_type, " ") + l_type = t[len(t)-1] + type_idx, err := strconv.ParseInt(l_type[len(l_type)-1:], 0, 8) if err != nil { - log.Fatal(err) + type_idx = 4 } + sublesson.Type = types[type_idx-1] - if err := json.Unmarshal(body, &list); err != nil { - return nil, err + var teacherId int64 + var groupId []int64 + + if isGroup { + teacher := l.Find(".schedule__teacher a").AttrOr("href", "/rasp?staffId=") + teacherId, err = strconv.ParseInt(teacher[14:], 0, 64) + if err != nil { + teacherId = 0 + } + groupId = append(groupId, sheduleId) + } else { + teacherId = sheduleId + l.Find("a.schedule__group").Each(func(k int, gr *goquery.Selection) { + id, err := strconv.ParseInt(gr.AttrOr("href", "/rasp?groupId=")[14:], 0, 64) + if err != nil { + teacherId = 0 + } + groupId = append(groupId, id) + }) } + sublesson.TeacherId = teacherId + sublesson.GroupId = groupId - } else { - return nil, fmt.Errorf("responce: %s", resp.Status) - } + // Я в рот ебал парсить это расписание, потому что у преподов решили номера подгрупп пихать + // в ссылки на группу, а не в предназначенный для этого элемент + subgroup := l.Find(".schedule__groups span").First().Text() + if subgroup == " " { + subgroup = "" + } + sublesson.SubGroup = subgroup - return list, nil -} - -// Connect to ssau.ru/rasp -// Returns goquery.Document, is shedule a group shedule and its ID -func Connect(uri string, week int) (*goquery.Document, bool, int64, error) { - client := http.Client{} - - req, err := http.NewRequest("GET", fmt.Sprintf("https://ssau.ru%s&selectedWeek=%d", uri, week), nil) - if err != nil { - return nil, false, 0, err - } - req.Header.Add("User-Agent", "Mozilla/5.0") - - resp, err := client.Do(req) - if err != nil { - return nil, false, 0, err - } - - doc, err := goquery.NewDocumentFromReader(resp.Body) - if err != nil { - return nil, false, 0, err - } - - var sheduleId int64 - var isGroup bool - - sheduleId, err = strconv.ParseInt(uri[14:], 0, 64) - if err != nil { - return nil, false, 0, err - } - - isGroup = strings.Contains(uri, "group") - - return doc, isGroup, sheduleId, nil -} - -func ConnectById(id int64, isTeacher bool, week int) (*goquery.Document, error) { - uri := GenerateUri(id, isTeacher) - doc, _, _, err := Connect(uri, week) - return doc, err + place := l.Find("div.schedule__place").First().Text() + if len(place) > 2 { + place = place[1:] + } + sublesson.Place = place + sublesson.Comment = l.Find("div.schedule__comment").First().Text() + + subs = append(subs, sublesson) + }) + + return subs } diff --git a/modules/ssau_parser/shedule.go b/modules/ssau_parser/shedule.go deleted file mode 100644 index e1210d6..0000000 --- a/modules/ssau_parser/shedule.go +++ /dev/null @@ -1,189 +0,0 @@ -package ssau_parser - -import ( - "strconv" - "strings" - "time" - - "github.com/PuerkitoBio/goquery" -) - -type Lesson struct { - Begin time.Time - End time.Time - NumInShedule int - SubLessons []SubLesson -} - -type SubLesson struct { - Type string - Name string - Place string - TeacherId int64 - GroupId []int64 - Comment string - SubGroup string -} - -type Shedule struct { - IsGroup bool - SheduleId int64 - GroupName string - SpecName string - Week int - Lessons [][]Lesson -} - -func GetSheduleInfo(doc *goquery.Document, sh *Shedule) { - spec := doc.Find(".info-block__description div").First().Text() - if spec != "" { - spec = spec[1:] - } - sh.SpecName = spec - sh.GroupName = doc.Find(".info-block__title").First().Text()[1:] - -} - -// Parse goquery shedule site -func Parse(doc *goquery.Document, isGroup bool, sheduleId int64, week int) (*Shedule, error) { - var sh Shedule - GetSheduleInfo(doc, &sh) - - var raw_dates []string - doc.Find(".schedule__head-date").Each(func(i int, s *goquery.Selection) { - sh_date := s.Text() - raw_dates = append(raw_dates, sh_date) - }) - - var raw_times []string - doc.Find(".schedule__time-item").Each(func(i int, s *goquery.Selection) { - sh_time := s.Text() + "+04" - raw_times = append(raw_times, sh_time) - }) - - var lessons [][]SubLesson - doc.Find(".schedule__item:not(.schedule__head)").Each(func(i int, s *goquery.Selection) { - sl := ParseSubLesson(s, isGroup, sheduleId) - lessons = append(lessons, sl) - }) - - var shedule [][]Lesson - var firstNum int - - for t := 0; t < len(raw_times); t += 2 { - if t == 0 { - begin, err := time.Parse(" 15:04 -07", raw_times[t]) - if err != nil { - return nil, err - } - switch begin.Hour() { - case 8: - firstNum = 0 - case 9: - firstNum = 1 - case 11: - firstNum = 2 - case 13: - firstNum = 3 - case 15: - firstNum = 4 - case 17: - firstNum = 5 - case 18: - firstNum = 6 - case 20: - firstNum = 7 - } - } - var time_line []Lesson - for d, date := range raw_dates { - begin_raw := date + raw_times[t] - begin, err := time.Parse(" 02.01.2006 15:04 -07", begin_raw) - if err != nil { - return nil, err - } - end_raw := date + raw_times[t+1] - end, err := time.Parse(" 02.01.2006 15:04 -07", end_raw) - if err != nil { - return nil, err - } - idx := (len(raw_dates))*t/2 + d - lesson := Lesson{ - Begin: begin, - End: end, - NumInShedule: t/2 + firstNum, - SubLessons: lessons[idx], - } - time_line = append(time_line, lesson) - } - shedule = append(shedule, time_line) - } - sh.IsGroup = isGroup - sh.SheduleId = sheduleId - sh.Week = week - sh.Lessons = shedule - return &sh, nil -} - -var types = [4]string{"lect", "lab", "pract", "other"} - -// Parse shedule item -func ParseSubLesson(s *goquery.Selection, isGroup bool, sheduleId int64) []SubLesson { - var subs []SubLesson - s.Find(".schedule__lesson").Each(func(j int, l *goquery.Selection) { - var sublesson SubLesson - - name := l.Find("div.schedule__discipline").First() - sublesson.Name = name.Text()[1:] - l_type := name.AttrOr("class", "lesson-color-type-4") - t := strings.Split(l_type, " ") - l_type = t[len(t)-1] - type_idx, err := strconv.ParseInt(l_type[len(l_type)-1:], 0, 8) - if err != nil { - type_idx = 4 - } - sublesson.Type = types[type_idx-1] - - var teacherId int64 - var groupId []int64 - - if isGroup { - teacher := l.Find(".schedule__teacher a").AttrOr("href", "/rasp?staffId=") - teacherId, err = strconv.ParseInt(teacher[14:], 0, 64) - if err != nil { - teacherId = 0 - } - groupId = append(groupId, sheduleId) - } else { - teacherId = sheduleId - l.Find("a.schedule__group").Each(func(k int, gr *goquery.Selection) { - id, err := strconv.ParseInt(gr.AttrOr("href", "/rasp?groupId=")[14:], 0, 64) - if err != nil { - teacherId = 0 - } - groupId = append(groupId, id) - }) - } - sublesson.TeacherId = teacherId - sublesson.GroupId = groupId - - // Я в рот ебал парсить это расписание, потому что у преподов решили номера подгрупп пихать - // в ссылки на группу, а не в предназначенный для этого элемент - subgroup := l.Find(".schedule__groups span").First().Text() - if subgroup == " " { - subgroup = "" - } - sublesson.SubGroup = subgroup - - place := l.Find("div.schedule__place").First().Text() - if len(place) > 2 { - place = place[1:] - } - sublesson.Place = place - sublesson.Comment = l.Find("div.schedule__comment").First().Text() - - subs = append(subs, sublesson) - }) - - return subs -} diff --git a/modules/ssau_parser/ssau_parser_test.go b/modules/ssau_parser/ssau_parser_test.go index 0229abd..d8e202e 100644 --- a/modules/ssau_parser/ssau_parser_test.go +++ b/modules/ssau_parser/ssau_parser_test.go @@ -9,8 +9,9 @@ import ( "github.com/joho/godotenv" ) +// TODO: выдумать и прописать упоротые тесты для всего func TestFindInRasp(t *testing.T) { - list, err := FindInRasp("2305") + list, err := SearchInRasp("2305") if err != nil { t.Error(err) } @@ -18,25 +19,25 @@ func TestFindInRasp(t *testing.T) { } func TestConnect(t *testing.T) { - list, err := FindInRasp("2305") + list, err := SearchInRasp("2305") if err != nil { t.Error(err) } uri := list[0].Url - _, _, _, err = Connect(uri, 3) + _, _, _, err = DownloadShedule(uri, 3) if err != nil { t.Error(err) } } func TestParse(t *testing.T) { - list, err := FindInRasp("2108") + list, err := SearchInRasp("2108") if err != nil { t.Error(err) } week := 5 uri := list[0].Url - doc, is, gr, err := Connect(uri, week) + doc, is, gr, err := DownloadShedule(uri, week) if err != nil { t.Error(err) } diff --git a/modules/tg/handlers.go b/modules/tg/handlers.go index 1bc8fb8..c21114e 100644 --- a/modules/tg/handlers.go +++ b/modules/tg/handlers.go @@ -70,7 +70,7 @@ func (bot *Bot) Find(query string) error { var teachers []database.Teacher bot.DB.Where(builder.Like{"LastName", query}).Find(&teachers) - list, _ := ssau_parser.FindInRasp(query) + list, _ := ssau_parser.SearchInRasp(query) allGroups := groups allTeachers := teachers diff --git a/modules/tg/shedule.go b/modules/tg/shedule.go index 9d39b3f..f3c8999 100644 --- a/modules/tg/shedule.go +++ b/modules/tg/shedule.go @@ -210,7 +210,7 @@ func (bot *Bot) LoadShedule(shedules []database.ShedulesInUser, week int, isRetr dw := isRetry[0] week -= bot.Week for _, sh := range shedules { - doc, err := ssau_parser.ConnectById(sh.SheduleId, sh.IsTeacher, week+dw) + doc, err := ssau_parser.DownloadSheduleById(sh.SheduleId, sh.IsTeacher, week+dw) if err != nil { return nil, err } diff --git a/modules/tg/week_shedule.go b/modules/tg/week_shedule.go index 8df982e..551feb0 100644 --- a/modules/tg/week_shedule.go +++ b/modules/tg/week_shedule.go @@ -88,7 +88,7 @@ func (bot *Bot) GetWeekSummary(shedules []database.ShedulesInUser, dw int, isPer minDay = lesson.NumInShedule } } - var times []ssau_parser.Lesson + var times []ssau_parser.Pair var beginsSlice []time.Time var endsSlice []time.Time for b := range begins { @@ -104,7 +104,7 @@ func (bot *Bot) GetWeekSummary(shedules []database.ShedulesInUser, dw int, isPer return endsSlice[i].Before(endsSlice[j]) }) for i, b := range beginsSlice { - sh := ssau_parser.Lesson{ + sh := ssau_parser.Pair{ Begin: b, End: endsSlice[i], } @@ -230,7 +230,7 @@ var weekdays = [6]string{ "сб", } -func (bot *Bot) CreateHTMLShedule(week int, shedule [][6][]database.Lesson, dates []time.Time, times []ssau_parser.Lesson) string { +func (bot *Bot) CreateHTMLShedule(week int, shedule [][6][]database.Lesson, dates []time.Time, times []ssau_parser.Pair) string { html := head html += fmt.Sprintf("
%d неделя
\n", week-bot.Week) html += "\n\n"
Время