Подготовка перед тотальным разносом всего

This commit is contained in:
far-galaxy 2023-07-30 19:04:20 +04:00
parent b78ccaaba9
commit a82d7664de
8 changed files with 315 additions and 315 deletions

View File

@ -0,0 +1,146 @@
package ssau_parser
import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
// Результаты поиска
type SearchResults []struct {
Id int64
Url string
Text string
}
// Страница с расписанием и служебными хвостами
type Page struct {
ID int64
IsGroup bool
Week int
Doc *goquery.Document
}
// Адрес основного сайта (прод или тестовый)
var headURL = "https://ssau.ru"
// Поиск расписания группы или преподавателя через ssau.ru/rasp/search
func SearchInRasp(query string) (SearchResults, error) {
client := http.Client{}
// Сначала заходим на сам сайт и получаем токены, чтобы нас посчитали человеком
req, err := http.NewRequest("GET", headURL+"/rasp", nil)
if err != nil {
return nil, err
}
req.Header.Add("User-Agent", "Mozilla/5.0")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
csrf, exists := doc.Find("meta[name='csrf-token']").Attr("content")
if !exists {
return nil, errors.New("missed csrf")
}
parm := url.Values{}
parm.Add("text", query)
// Теперь можно обращаться к подобию API
req, err = http.NewRequest("POST", headURL+"/rasp/search", strings.NewReader(parm.Encode()))
if err != nil {
return nil, err
}
for _, cookie := range resp.Cookies() {
req.AddCookie(cookie)
}
req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
req.Header.Add("User-Agent", "Mozilla/5.0")
req.Header.Add("Accept", "application/json")
req.Header.Add("X-CSRF-TOKEN", csrf)
resp, err = client.Do(req)
if err != nil {
return nil, err
}
var list SearchResults
if resp.StatusCode == 200 {
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if err := json.Unmarshal(body, &list); err != nil {
return nil, err
}
} else {
return nil, fmt.Errorf("responce: %s", resp.Status)
}
return list, nil
}
// Загрузка страницы с расписанием из ssau.ru/rasp по URI и номеру недели (в семестре)
func DownloadShedule(uri string, week int) (Page, error) {
var page Page
var err error
page.ID, err = strconv.ParseInt(uri[14:], 0, 64)
if err != nil {
return page, err
}
page.IsGroup = strings.Contains(uri, "group")
page.Week = week
client := http.Client{}
req, err := http.NewRequest("GET", fmt.Sprintf("%s%s&selectedWeek=%d", headURL, uri, week), nil)
if err != nil {
return page, err
}
req.Header.Add("User-Agent", "Mozilla/5.0")
resp, err := client.Do(req)
if err != nil {
return page, err
}
page.Doc, err = goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return page, err
}
return page, nil
}
// Загрузка страницы с расписанием из ssau.ru/rasp по ID и номеру недели (в семестре)
func DownloadSheduleById(id int64, isGroup bool, week int) (Page, error) {
uri := GenerateUri(id, isGroup)
return DownloadShedule(uri, week)
}
// Создать URI по ID и условию группа/преподаватель
func GenerateUri(id int64, isGroup bool) string {
var uri string
if isGroup {
uri = fmt.Sprintf("/rasp?groupId=%d", id)
} else {
uri = fmt.Sprintf("/rasp?staffId=%d", id)
}
return uri
}

View File

@ -1,7 +1,6 @@
package ssau_parser
import (
"fmt"
"log"
"strings"
@ -9,7 +8,7 @@ import (
"xorm.io/xorm"
)
func UploadShedule(db *xorm.Engine, sh Shedule) error {
func UploadShedule(db *xorm.Engine, sh WeekShedule) error {
err := addGroupOrTeacher(db, sh)
if err != nil {
return err
@ -19,7 +18,7 @@ func UploadShedule(db *xorm.Engine, sh Shedule) error {
for _, line := range sh.Lessons {
for _, lesson := range line {
var pair database.Lesson
for _, subLesson := range lesson.SubLessons {
for _, subLesson := range lesson.Lessons {
pair = database.Lesson{
Begin: lesson.Begin,
End: lesson.End,
@ -35,11 +34,11 @@ func UploadShedule(db *xorm.Engine, sh Shedule) error {
if !exists && subLesson.TeacherId != 0 {
uri := GenerateUri(subLesson.TeacherId, true)
doc, _, _, err := Connect(uri, sh.Week)
doc, _, _, err := DownloadShedule(uri, sh.Week)
if err != nil {
return err
}
var gr Shedule
var gr WeekShedule
gr.IsGroup = false
gr.SheduleId = subLesson.TeacherId
GetSheduleInfo(doc, &gr)
@ -56,11 +55,11 @@ func UploadShedule(db *xorm.Engine, sh Shedule) error {
if !exists {
uri := GenerateUri(groupId, false)
doc, _, _, err := Connect(uri, sh.Week)
doc, _, _, err := DownloadShedule(uri, sh.Week)
if err != nil {
return err
}
var gr Shedule
var gr WeekShedule
gr.IsGroup = true
gr.SheduleId = groupId
GetSheduleInfo(doc, &gr)
@ -90,16 +89,6 @@ func UploadShedule(db *xorm.Engine, sh Shedule) error {
return nil
}
func GenerateUri(id int64, isTeacher bool) string {
var uri string
if isTeacher {
uri = fmt.Sprintf("/rasp?staffId=%d", id)
} else {
uri = fmt.Sprintf("/rasp?groupId=%d", id)
}
return uri
}
func isGroupExists(db *xorm.Engine, groupId int64) (bool, error) {
var exists []database.Group
err := db.Find(&exists, database.Group{GroupId: groupId})
@ -120,7 +109,7 @@ func isTeacherExists(db *xorm.Engine, teacherId int64) (bool, error) {
return len(exists) == 1, nil
}
func addGroupOrTeacher(db *xorm.Engine, sh Shedule) error {
func addGroupOrTeacher(db *xorm.Engine, sh WeekShedule) error {
if sh.IsGroup {
exists, err := isGroupExists(db, sh.SheduleId)
if err != nil {

View File

@ -1,126 +1,179 @@
package ssau_parser
import (
"encoding/json"
"errors"
"fmt"
"io"
"log"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
type RaspList struct {
Items []RaspItems
type Pair struct {
Begin time.Time
End time.Time
NumInShedule int
Lessons []Lesson
}
type RaspItems []struct {
Id int64
Url string
Text string
type Lesson struct {
Type string
Name string
Place string
TeacherId int64
GroupId []int64
Comment string
SubGroup string
}
func FindInRasp(query string) (RaspItems, error) {
client := http.Client{}
type WeekShedule struct {
IsGroup bool
SheduleId int64
GroupName string
SpecName string
Week int
Lessons [][]Pair
}
req, err := http.NewRequest("GET", "https://ssau.ru/rasp", nil)
if err != nil {
return nil, err
// Получить полный номер группы и название специальности
// TODO: проверить, как это с преподами работает
func GetSheduleInfo(doc *goquery.Document, sh *WeekShedule) {
spec := doc.Find(".info-block__description div").First().Text()
if spec != "" {
spec = spec[1:]
}
req.Header.Add("User-Agent", "Mozilla/5.0")
sh.SpecName = spec
sh.GroupName = strings.TrimSpace(doc.Find(".info-block__title").First().Text())
resp, err := client.Do(req)
if err != nil {
return nil, err
}
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
csrf, exists := doc.Find("meta[name='csrf-token']").Attr("content")
if !exists {
return nil, errors.New("missed csrf")
}
// Соотнесение часа начала пары с его порядковым номером
var hourMap = map[int]int{8: 0, 9: 1, 11: 2, 13: 3, 15: 4, 17: 5, 18: 6, 20: 7}
parm := url.Values{}
parm.Add("text", query)
req, err = http.NewRequest("POST", "https://ssau.ru/rasp/search", strings.NewReader(parm.Encode()))
if err != nil {
return nil, err
}
// Парсинг страницы с расписанием
func Parse(p Page) (*WeekShedule, error) {
var sh WeekShedule
doc := p.Doc
GetSheduleInfo(doc, &sh)
for _, cookie := range resp.Cookies() {
req.AddCookie(cookie)
}
req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
req.Header.Add("User-Agent", "Mozilla/5.0")
req.Header.Add("Accept", "application/json")
req.Header.Add("X-CSRF-TOKEN", csrf)
var raw_dates []string
doc.Find(".schedule__head-date").Each(func(i int, s *goquery.Selection) {
sh_date := s.Text()
raw_dates = append(raw_dates, sh_date)
})
resp, err = client.Do(req)
if err != nil {
return nil, err
}
var raw_times []string
doc.Find(".schedule__time-item").Each(func(i int, s *goquery.Selection) {
sh_time := s.Text() + "+04"
raw_times = append(raw_times, sh_time)
})
var list RaspItems
if resp.StatusCode == 200 {
body, err := io.ReadAll(resp.Body)
var lessons [][]Lesson
doc.Find(".schedule__item:not(.schedule__head)").Each(func(i int, s *goquery.Selection) {
sl := ParseSubLesson(s, p.IsGroup, p.ID)
lessons = append(lessons, sl)
})
var shedule [][]Pair
var firstNum int
for t := 0; t < len(raw_times); t += 2 {
if t == 0 {
begin, err := time.Parse(" 15:04 -07", raw_times[t])
if err != nil {
return nil, err
}
firstNum, _ = hourMap[begin.Hour()]
}
var time_line []Pair
for d, date := range raw_dates {
begin_raw := date + raw_times[t]
begin, err := time.Parse(" 02.01.2006 15:04 -07", begin_raw)
if err != nil {
return nil, err
}
end_raw := date + raw_times[t+1]
end, err := time.Parse(" 02.01.2006 15:04 -07", end_raw)
if err != nil {
return nil, err
}
idx := (len(raw_dates))*t/2 + d
lesson := Pair{
Begin: begin,
End: end,
NumInShedule: t/2 + firstNum,
Lessons: lessons[idx],
}
time_line = append(time_line, lesson)
}
shedule = append(shedule, time_line)
}
sh.IsGroup = p.IsGroup
sh.SheduleId = p.ID
sh.Week = p.Week
sh.Lessons = shedule
return &sh, nil
}
var types = [4]string{"lect", "lab", "pract", "other"}
// Парсинг занятия
func ParseSubLesson(s *goquery.Selection, isGroup bool, sheduleId int64) []Lesson {
var subs []Lesson
s.Find(".schedule__lesson").Each(func(j int, l *goquery.Selection) {
var sublesson Lesson
name := l.Find("div.schedule__discipline").First()
sublesson.Name = name.Text()[1:]
l_type := name.AttrOr("class", "lesson-color-type-4")
t := strings.Split(l_type, " ")
l_type = t[len(t)-1]
type_idx, err := strconv.ParseInt(l_type[len(l_type)-1:], 0, 8)
if err != nil {
log.Fatal(err)
type_idx = 4
}
sublesson.Type = types[type_idx-1]
if err := json.Unmarshal(body, &list); err != nil {
return nil, err
var teacherId int64
var groupId []int64
if isGroup {
teacher := l.Find(".schedule__teacher a").AttrOr("href", "/rasp?staffId=")
teacherId, err = strconv.ParseInt(teacher[14:], 0, 64)
if err != nil {
teacherId = 0
}
groupId = append(groupId, sheduleId)
} else {
teacherId = sheduleId
l.Find("a.schedule__group").Each(func(k int, gr *goquery.Selection) {
id, err := strconv.ParseInt(gr.AttrOr("href", "/rasp?groupId=")[14:], 0, 64)
if err != nil {
teacherId = 0
}
groupId = append(groupId, id)
})
}
sublesson.TeacherId = teacherId
sublesson.GroupId = groupId
} else {
return nil, fmt.Errorf("responce: %s", resp.Status)
}
// Я в рот ебал парсить это расписание, потому что у преподов решили номера подгрупп пихать
// в ссылки на группу, а не в предназначенный для этого элемент
subgroup := l.Find(".schedule__groups span").First().Text()
if subgroup == " " {
subgroup = ""
}
sublesson.SubGroup = subgroup
return list, nil
}
// Connect to ssau.ru/rasp
// Returns goquery.Document, is shedule a group shedule and its ID
func Connect(uri string, week int) (*goquery.Document, bool, int64, error) {
client := http.Client{}
req, err := http.NewRequest("GET", fmt.Sprintf("https://ssau.ru%s&selectedWeek=%d", uri, week), nil)
if err != nil {
return nil, false, 0, err
}
req.Header.Add("User-Agent", "Mozilla/5.0")
resp, err := client.Do(req)
if err != nil {
return nil, false, 0, err
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, false, 0, err
}
var sheduleId int64
var isGroup bool
sheduleId, err = strconv.ParseInt(uri[14:], 0, 64)
if err != nil {
return nil, false, 0, err
}
isGroup = strings.Contains(uri, "group")
return doc, isGroup, sheduleId, nil
}
func ConnectById(id int64, isTeacher bool, week int) (*goquery.Document, error) {
uri := GenerateUri(id, isTeacher)
doc, _, _, err := Connect(uri, week)
return doc, err
place := l.Find("div.schedule__place").First().Text()
if len(place) > 2 {
place = place[1:]
}
sublesson.Place = place
sublesson.Comment = l.Find("div.schedule__comment").First().Text()
subs = append(subs, sublesson)
})
return subs
}

View File

@ -1,189 +0,0 @@
package ssau_parser
import (
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
type Lesson struct {
Begin time.Time
End time.Time
NumInShedule int
SubLessons []SubLesson
}
type SubLesson struct {
Type string
Name string
Place string
TeacherId int64
GroupId []int64
Comment string
SubGroup string
}
type Shedule struct {
IsGroup bool
SheduleId int64
GroupName string
SpecName string
Week int
Lessons [][]Lesson
}
func GetSheduleInfo(doc *goquery.Document, sh *Shedule) {
spec := doc.Find(".info-block__description div").First().Text()
if spec != "" {
spec = spec[1:]
}
sh.SpecName = spec
sh.GroupName = doc.Find(".info-block__title").First().Text()[1:]
}
// Parse goquery shedule site
func Parse(doc *goquery.Document, isGroup bool, sheduleId int64, week int) (*Shedule, error) {
var sh Shedule
GetSheduleInfo(doc, &sh)
var raw_dates []string
doc.Find(".schedule__head-date").Each(func(i int, s *goquery.Selection) {
sh_date := s.Text()
raw_dates = append(raw_dates, sh_date)
})
var raw_times []string
doc.Find(".schedule__time-item").Each(func(i int, s *goquery.Selection) {
sh_time := s.Text() + "+04"
raw_times = append(raw_times, sh_time)
})
var lessons [][]SubLesson
doc.Find(".schedule__item:not(.schedule__head)").Each(func(i int, s *goquery.Selection) {
sl := ParseSubLesson(s, isGroup, sheduleId)
lessons = append(lessons, sl)
})
var shedule [][]Lesson
var firstNum int
for t := 0; t < len(raw_times); t += 2 {
if t == 0 {
begin, err := time.Parse(" 15:04 -07", raw_times[t])
if err != nil {
return nil, err
}
switch begin.Hour() {
case 8:
firstNum = 0
case 9:
firstNum = 1
case 11:
firstNum = 2
case 13:
firstNum = 3
case 15:
firstNum = 4
case 17:
firstNum = 5
case 18:
firstNum = 6
case 20:
firstNum = 7
}
}
var time_line []Lesson
for d, date := range raw_dates {
begin_raw := date + raw_times[t]
begin, err := time.Parse(" 02.01.2006 15:04 -07", begin_raw)
if err != nil {
return nil, err
}
end_raw := date + raw_times[t+1]
end, err := time.Parse(" 02.01.2006 15:04 -07", end_raw)
if err != nil {
return nil, err
}
idx := (len(raw_dates))*t/2 + d
lesson := Lesson{
Begin: begin,
End: end,
NumInShedule: t/2 + firstNum,
SubLessons: lessons[idx],
}
time_line = append(time_line, lesson)
}
shedule = append(shedule, time_line)
}
sh.IsGroup = isGroup
sh.SheduleId = sheduleId
sh.Week = week
sh.Lessons = shedule
return &sh, nil
}
var types = [4]string{"lect", "lab", "pract", "other"}
// Parse shedule item
func ParseSubLesson(s *goquery.Selection, isGroup bool, sheduleId int64) []SubLesson {
var subs []SubLesson
s.Find(".schedule__lesson").Each(func(j int, l *goquery.Selection) {
var sublesson SubLesson
name := l.Find("div.schedule__discipline").First()
sublesson.Name = name.Text()[1:]
l_type := name.AttrOr("class", "lesson-color-type-4")
t := strings.Split(l_type, " ")
l_type = t[len(t)-1]
type_idx, err := strconv.ParseInt(l_type[len(l_type)-1:], 0, 8)
if err != nil {
type_idx = 4
}
sublesson.Type = types[type_idx-1]
var teacherId int64
var groupId []int64
if isGroup {
teacher := l.Find(".schedule__teacher a").AttrOr("href", "/rasp?staffId=")
teacherId, err = strconv.ParseInt(teacher[14:], 0, 64)
if err != nil {
teacherId = 0
}
groupId = append(groupId, sheduleId)
} else {
teacherId = sheduleId
l.Find("a.schedule__group").Each(func(k int, gr *goquery.Selection) {
id, err := strconv.ParseInt(gr.AttrOr("href", "/rasp?groupId=")[14:], 0, 64)
if err != nil {
teacherId = 0
}
groupId = append(groupId, id)
})
}
sublesson.TeacherId = teacherId
sublesson.GroupId = groupId
// Я в рот ебал парсить это расписание, потому что у преподов решили номера подгрупп пихать
// в ссылки на группу, а не в предназначенный для этого элемент
subgroup := l.Find(".schedule__groups span").First().Text()
if subgroup == " " {
subgroup = ""
}
sublesson.SubGroup = subgroup
place := l.Find("div.schedule__place").First().Text()
if len(place) > 2 {
place = place[1:]
}
sublesson.Place = place
sublesson.Comment = l.Find("div.schedule__comment").First().Text()
subs = append(subs, sublesson)
})
return subs
}

View File

@ -9,8 +9,9 @@ import (
"github.com/joho/godotenv"
)
// TODO: выдумать и прописать упоротые тесты для всего
func TestFindInRasp(t *testing.T) {
list, err := FindInRasp("2305")
list, err := SearchInRasp("2305")
if err != nil {
t.Error(err)
}
@ -18,25 +19,25 @@ func TestFindInRasp(t *testing.T) {
}
func TestConnect(t *testing.T) {
list, err := FindInRasp("2305")
list, err := SearchInRasp("2305")
if err != nil {
t.Error(err)
}
uri := list[0].Url
_, _, _, err = Connect(uri, 3)
_, _, _, err = DownloadShedule(uri, 3)
if err != nil {
t.Error(err)
}
}
func TestParse(t *testing.T) {
list, err := FindInRasp("2108")
list, err := SearchInRasp("2108")
if err != nil {
t.Error(err)
}
week := 5
uri := list[0].Url
doc, is, gr, err := Connect(uri, week)
doc, is, gr, err := DownloadShedule(uri, week)
if err != nil {
t.Error(err)
}

View File

@ -70,7 +70,7 @@ func (bot *Bot) Find(query string) error {
var teachers []database.Teacher
bot.DB.Where(builder.Like{"LastName", query}).Find(&teachers)
list, _ := ssau_parser.FindInRasp(query)
list, _ := ssau_parser.SearchInRasp(query)
allGroups := groups
allTeachers := teachers

View File

@ -210,7 +210,7 @@ func (bot *Bot) LoadShedule(shedules []database.ShedulesInUser, week int, isRetr
dw := isRetry[0]
week -= bot.Week
for _, sh := range shedules {
doc, err := ssau_parser.ConnectById(sh.SheduleId, sh.IsTeacher, week+dw)
doc, err := ssau_parser.DownloadSheduleById(sh.SheduleId, sh.IsTeacher, week+dw)
if err != nil {
return nil, err
}

View File

@ -88,7 +88,7 @@ func (bot *Bot) GetWeekSummary(shedules []database.ShedulesInUser, dw int, isPer
minDay = lesson.NumInShedule
}
}
var times []ssau_parser.Lesson
var times []ssau_parser.Pair
var beginsSlice []time.Time
var endsSlice []time.Time
for b := range begins {
@ -104,7 +104,7 @@ func (bot *Bot) GetWeekSummary(shedules []database.ShedulesInUser, dw int, isPer
return endsSlice[i].Before(endsSlice[j])
})
for i, b := range beginsSlice {
sh := ssau_parser.Lesson{
sh := ssau_parser.Pair{
Begin: b,
End: endsSlice[i],
}
@ -230,7 +230,7 @@ var weekdays = [6]string{
"сб",
}
func (bot *Bot) CreateHTMLShedule(week int, shedule [][6][]database.Lesson, dates []time.Time, times []ssau_parser.Lesson) string {
func (bot *Bot) CreateHTMLShedule(week int, shedule [][6][]database.Lesson, dates []time.Time, times []ssau_parser.Pair) string {
html := head
html += fmt.Sprintf("<div class=\"note\"><div id=\"week\">%d неделя</div></div>\n", week-bot.Week)
html += "<table class=\"rasp\">\n<tr><th class=\"head\" style=\"width: 4rem\">Время</th>\n"