Добавлено: парсинг расписания
This commit is contained in:
parent
47d96fb1e4
commit
804b767dbb
|
@ -2,11 +2,14 @@ package ssau_parser
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
@ -21,34 +24,34 @@ type RaspItems []struct {
|
|||
Text string
|
||||
}
|
||||
|
||||
func FindInRasp(query string) RaspItems {
|
||||
func FindInRasp(query string) (RaspItems, error) {
|
||||
client := http.Client{}
|
||||
|
||||
req, err := http.NewRequest("GET", "https://ssau.ru/rasp", nil)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Add("User-Agent", "Mozilla/5.0")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return nil, err
|
||||
}
|
||||
csrf, exists := doc.Find("meta[name='csrf-token']").Attr("content")
|
||||
if !exists {
|
||||
log.Fatal("Missed CSRF")
|
||||
return nil, errors.New("missed csrf")
|
||||
}
|
||||
|
||||
parm := url.Values{}
|
||||
parm.Add("text", query)
|
||||
req, err = http.NewRequest("POST", "https://ssau.ru/rasp/search", strings.NewReader(parm.Encode()))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, cookie := range resp.Cookies() {
|
||||
|
@ -61,7 +64,7 @@ func FindInRasp(query string) RaspItems {
|
|||
|
||||
resp, err = client.Do(req)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var list RaspItems
|
||||
|
@ -72,12 +75,119 @@ func FindInRasp(query string) RaspItems {
|
|||
}
|
||||
|
||||
if err := json.Unmarshal(body, &list); err != nil {
|
||||
log.Fatal(err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
} else {
|
||||
log.Fatal("Responce: " + resp.Status)
|
||||
return nil, fmt.Errorf("Responce: %s", resp.Status)
|
||||
}
|
||||
|
||||
return list
|
||||
return list, nil
|
||||
}
|
||||
|
||||
type Times struct {
|
||||
Begin time.Time
|
||||
End time.Time
|
||||
}
|
||||
|
||||
func Connect(uri string, week int) (*goquery.Document, error) {
|
||||
client := http.Client{}
|
||||
|
||||
req, err := http.NewRequest("GET", fmt.Sprintf("https://ssau.ru%s&selectedWeek=%d", uri, week), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Add("User-Agent", "Mozilla/5.0")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
type Lesson struct {
|
||||
Begin time.Time
|
||||
End time.Time
|
||||
Name string
|
||||
}
|
||||
|
||||
type Shedule struct {
|
||||
SpecName string
|
||||
Week int
|
||||
Lessons [][]Lesson
|
||||
}
|
||||
|
||||
func Parse(doc *goquery.Document) (*Shedule, error) {
|
||||
spec := doc.Find(".info-block__description div").First().Text()[1:]
|
||||
log.Println(spec)
|
||||
|
||||
var raw_dates []string
|
||||
doc.Find(".schedule__head-date").Each(func(i int, s *goquery.Selection) {
|
||||
sh_date := s.Text()
|
||||
raw_dates = append(raw_dates, sh_date)
|
||||
})
|
||||
|
||||
var raw_times []string
|
||||
doc.Find(".schedule__time-item").Each(func(i int, s *goquery.Selection) {
|
||||
sh_time := s.Text() + "+04"
|
||||
raw_times = append(raw_times, sh_time)
|
||||
})
|
||||
|
||||
var lessons []string
|
||||
doc.Find(".schedule__item:not(.schedule__head)").Each(func(i int, s *goquery.Selection) {
|
||||
lesson := s.Text()
|
||||
lessons = append(lessons, lesson)
|
||||
})
|
||||
|
||||
var shedule [][]Lesson
|
||||
|
||||
for t := 0; t < len(raw_times); t += 2 {
|
||||
var time_line []Lesson
|
||||
for d, date := range raw_dates {
|
||||
begin_raw := date + raw_times[t]
|
||||
begin, err := time.Parse(" 02.01.2006 15:04 -07", begin_raw)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
end_raw := date + raw_times[t+1]
|
||||
end, err := time.Parse(" 02.01.2006 15:04 -07", end_raw)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
idx := (len(raw_times)-2)*t/2 + d
|
||||
lesson := Lesson{
|
||||
Begin: begin,
|
||||
End: end,
|
||||
Name: lessons[idx],
|
||||
}
|
||||
time_line = append(time_line, lesson)
|
||||
}
|
||||
shedule = append(shedule, time_line)
|
||||
}
|
||||
return &Shedule{SpecName: spec, Lessons: shedule}, nil
|
||||
}
|
||||
|
||||
/*
|
||||
type Lesson struct {
|
||||
Type string
|
||||
Name string
|
||||
Place string
|
||||
TeacherID int64
|
||||
Comment string
|
||||
}
|
||||
|
||||
func parseLesson(l *goquery.Selection) {
|
||||
var lesson Lesson
|
||||
d, _ := l.Find("div.schedule__discipline").Attr("class")
|
||||
t := strings.Split(d, " ")
|
||||
lesson.Type = t[len(t)-1]
|
||||
lesson.Name = l.Find("div.schedule__discipline").First().Text()
|
||||
}
|
||||
*/
|
||||
|
|
|
@ -6,6 +6,34 @@ import (
|
|||
)
|
||||
|
||||
func TestFindInRasp(t *testing.T) {
|
||||
list := FindInRasp("2305")
|
||||
list, err := FindInRasp("2305")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
log.Println(list)
|
||||
}
|
||||
|
||||
func TestConnect(t *testing.T) {
|
||||
list, err := FindInRasp("2305")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
uri := list[0].Url
|
||||
_, err = Connect(uri, 3)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
list, err := FindInRasp("2405")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
uri := list[0].Url
|
||||
doc, err := Connect(uri, 3)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
Parse(doc)
|
||||
}
|
||||
|
|
Reference in New Issue