Skip to content

Commit

Permalink
feat: change scan pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
phuchptty committed Jan 17, 2024
1 parent 1a1b2f2 commit a87453b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
2 changes: 1 addition & 1 deletion modules/exporter/tsv.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func TsvExport(subjectScoreMap map[string]SubjectStudentCore, output string) {
for _, value := range subjectScoreMap {
_, subjectExistYet := subjectsDataMap[value.SubjectCode]

if !subjectExistYet {
if !subjectExistYet && value.SubjectCode != "" {
subjectInfo := SubjectInfo{
SubjectName: value.SubjectName,
SubjectCode: value.SubjectCode,
Expand Down
18 changes: 15 additions & 3 deletions modules/scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func scanFile(input string) map[string]exporter.SubjectStudentCore {
}

// Find the review items. Which is <p> tag with text "Số TC:" and <table> tag
allSelection := doc.Find("p:contains(\"Số TC:\"), table")
allSelection := doc.Find("p:contains(\"CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM\"), p:contains(\"Số TC:\"), table")

// Global map to store all scores and subject info
var ssScores = make(map[string]exporter.SubjectStudentCore)
Expand All @@ -113,12 +113,24 @@ func scanFile(input string) map[string]exporter.SubjectStudentCore {
s := allSelection.Eq(i)

// Check if the current selection is <p> tag
if s.Is("p") {
// Before each table segments, there are <p> tag with subject data and "header" tag
// And the subject data <p> tag usually next to the "header" tag
if s.Is("p") && strings.Contains(strings.ToUpper(s.Text()), "CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM") {
// Always subject data is in the next <p> tag
s1 := allSelection.Eq(i + 1)

// In some cases, the subject data is not next to "header" tag, or just don't have.
//Continue and use the previous subject code
if !strings.Contains(s1.Text(), "Số TC:") {
continue
}

// Split the string by "Số TC:"
parts := strings.Split(s.Text(), "Số TC:")
parts := strings.Split(s1.Text(), "Số TC:")

// The first part contains the course name, split it by ":"
courseParts := strings.Split(parts[0], ":")

subjectName := utils.CleanSubjectName(strings.TrimSpace(courseParts[1]))

// The second part contains the course credit and code, split it by "Mã học phần:"
Expand Down
5 changes: 5 additions & 0 deletions utils/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ func CleanSubjectName(value string) string {

// Remove last part
valueParts := strings.Split(value, "-")

if len(valueParts) == 1 {
return value
}

value = strings.TrimSpace(strings.Join(valueParts[:len(valueParts)-1], " "))

return value
Expand Down

0 comments on commit a87453b

Please sign in to comment.