Skip to content

Instantly share code, notes, and snippets.

@mhakrook
Last active May 21, 2020 08:43
Show Gist options
  • Save mhakrook/c1b715619e39195b574b53154242fd73 to your computer and use it in GitHub Desktop.
Save mhakrook/c1b715619e39195b574b53154242fd73 to your computer and use it in GitHub Desktop.
function scraper() {
let ss = SpreadsheetApp.openById('1dXDdhTG-3ppO5ybMPTocrkO2y4T222zC1iVCVZuLfIg')
let sheets = ss.getSheets()
if (sheets.length > 1) {
sheets.forEach(function a(s) {
if (s.getRange(1, 1).getValue() == '') ss.deleteSheet(s)
})
}
let reglevel = /<divclass="hidden-xssm-small\b\S+?\bdiv>/gm
let url = "https://www.dltv.ac.th/channels/schedules/days/1"
let options = {
'muteHttpExceptions': true,
'followRedirects': false,
};
let r = UrlFetchApp.fetch(url, options);
let html = r.getContentText().replace(/(\r\r|\n|\r)/gm, "").replace(/\s/gm, "").replace(/\//g, "")
let divlevel = html.match(reglevel)
let levels = divlevel.map(function a(div) {
div = div.replace('<divclass="hidden-xssm-small">', '')
.replace('<div>', '')
return div
})
let timelength = ['', '00:00', '00:30', '01:00', '01:30', '02:00', '02:30', '03:00', '03:30', '04:00', '04:30', '05:00', '05:30', '06:00',
'06:30', '07:00', '07:30', '08:00', '08:01', '08:30', '09:00', '09:30', '10:00', '10:30', '11:00', '11:30', '12:00',
'12:30', '13:00', '13:30', '14:00', '14:30', '15:00', '15:30', '16:00', '16:30', '17:00', '17:30', '18:00', '18:01', '18:30',
'19:00', '19:30', '20:00', '20:30', '21:00', '21:30', '22:00', '22:30', '23:30'
]
let length = timelength.length
let regcontent = /<divclass="col-md-8"style="background-color:#f9f9f9">\S+?\bdiv>/gm
let regtime = /<divclass="col-md-2">\b\S+?\bdiv>/gm
levels.forEach(function a(level, index) {
try {
let sheet = ss.insertSheet(level).activate()
let data = []
let urls
sheet.appendRow(timelength)
for (let j = 5; j <= 6; j++) {
for (let i = 1; i <= 31; i++) {
let date
if (i <= 9) date = '2020-0' + j + '-0' + i
else date = '2020-0' + j + '-' + i
let eachurl = "https://www.dltv.ac.th/channels/schedules/days/" + (index + 1) + "/" + date
let r
try {
r = UrlFetchApp.fetch(eachurl);
} catch (e) {
r = UrlFetchApp.fetch(eachurl);
}
let html = r.getContentText().replace(/(\r\r|\n|\r)/gm, "").replace(/\s/gm, "").replace(/\//g, "")
let divtime = html.match(regtime)
if (divtime != null && divtime.length > 0) {
let times = divtime.map(function a(div) {
div = div.replace('<divclass="col-md-2">', '').replace('<div>', '')
div = div.substring(0, div.length - 3)
return div
})
let divcontent = html.match(regcontent)
let contents = divcontent.map(function a(div) {
div = div.replace('<divclass="col-md-8"style="background-color:#f9f9f9">', '').replace('<div>', '')
return div
})
let contenttime = Array(length).fill('')
times.forEach(function c(t, index) {
let t_index = timelength.findIndex(function d(col) {
return col == t
})
contenttime[t_index] = contents[index]
})
contenttime[0] = i + '/' + j + '/2020'
//data.push(contenttime)
sheet.appendRow(contenttime)
}
}
}
sheet.getRange(1, 1).setValue('วัน/เวลา')
// sheet.getRange(2, 1, data.length, data[0].length).setValues(data)
} catch (e) {
}
})
}
function deletelevel() {
let ss = SpreadsheetApp.openById('1dXDdhTG-3ppO5ybMPTocrkO2y4T222zC1iVCVZuLfIg')
let level = ['ประถมศึกษาปีที่1', 'ประถมศึกษาปีที่2', 'ประถมศึกษาปีที่3', 'ประถมศึกษาปีที่4', 'ประถมศึกษาปีที่5', 'ประถมศึกษาปีที่6', 'มัธยมศึกษาปีที่1', 'มัธยมศึกษาปีที่2', 'มัธยมศึกษาปีที่3', 'อนุบาลศึกษาปีที่1', 'อนุบาลศึกษาปีที่2', 'อนุบาลศึกษาปีที่3', 'มัธยมศึกษาปีที่4', 'มัธยมศึกษาปีที่5', 'มัธยมศึกษาปีที่6']
level.forEach(function a(level) {
try {
ss.deleteSheet(ss.getSheetByName(level))
} catch (e) {
}
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment