script to download audio clips from https://kg3.qq.com
tries to infer filename from title and falls back to using the song code.
this worked on january 8th, 2020; good luck getting it to work in the future!
script to download audio clips from https://kg3.qq.com
tries to infer filename from title and falls back to using the song code.
this worked on january 8th, 2020; good luck getting it to work in the future!
const https = require('https'); | |
const http = require('http'); | |
const fs = require('fs'); | |
// recommend only doing ~4-5 at a time max | |
let i_start = 0, i_end = 4; | |
let urls = [ | |
]; | |
// part after https://kg3.qq.com/node/play?s= | |
let codes = [ | |
]; | |
let data = []; | |
const playurl_regex = /playurl":"(http:\/\/...?.stream.kg.qq.com\/szkge-btfs\/[A-Za-z0-9?=&_.]+)"/g; | |
const title_regex = /<title>(.*)- 全民K歌,KTV交友社区<\/title>/g; | |
function download_file(src, dest, writeRaw) { | |
let file = fs.createWriteStream(dest); | |
if (writeRaw) { | |
console.log('Saving raw HTML instead; find the player url yourself'); | |
file.write(writeRaw); | |
file.end(); | |
return; | |
} | |
console.log(`Starting download of ${dest}`); | |
http.get(src, function(res) { | |
res.on('data', function(data) { | |
file.write(data); | |
}).on('end', function() { | |
file.end(); | |
console.log(`Finished download of ${dest}`); | |
}); | |
}); | |
} | |
urls.forEach((this_song, i) => { | |
if (i < i_start || i > i_end) return; | |
var this_song_id = codes[i]; | |
data.push(''); | |
https.get(this_song, { | |
headers: { | |
'User-Agent': 'curl/7.64.0' // surprisingly, cURL as a user agent works... | |
} | |
}, (res) => { | |
console.log(`${this_song_id} response code: ${res.statusCode}`); | |
res.on('data', (c) => { | |
data[i] += c; | |
}); | |
res.on('end', () => { | |
// data is in data now | |
var filename; | |
for (let j = 0; j < 5; j++) { | |
if (filename) break; | |
title_regex.lastIndex = 0; | |
filename = title_regex.exec(data[i]); | |
} | |
if (filename) {} | |
else { | |
console.log('for some reason ${this_song_id} failed to parse filename... oops!'); | |
filename = [0, this_song_id]; | |
} | |
console.log(`${this_song_id} is ${filename[1]}.m4a`); | |
var download_url; | |
for (let j = 0; j < 5; j++) { | |
if (download_url) break; | |
playurl_regex.lastIndex = 0; | |
download_url = playurl_regex.exec(data[i]); | |
} | |
if (filename && download_url) download_file(download_url[1], filename[1] + '.m4a'); | |
else { | |
console.log(`for some reason ${this_song_id} failed to parse download url... oops!`); | |
download_file(null, this_song_id + '.html', data[i]); | |
} | |
}); | |
}).on('error', (e) => { | |
console.error(this_song + ': ' + e); | |
}); | |
}); |