Compare commits

...

2 Commits

Author SHA1 Message Date
HugoPoi
eff3f8956c feat: spotify html scrape from url 2020-07-04 17:40:20 +02:00
HugoPoi
f3b80e76fa feat: add myfreemp3 download source 2020-07-04 17:30:25 +02:00
2 changed files with 38 additions and 23 deletions

View File

@ -10,12 +10,12 @@ Inspired by [DatMusic](https://github.com/alashow/datmusic-api), [MyFreeMp3](htt
*For now*
* Parse a Spotify Playlist page
* Download a title from the playlist
* Scrape Spotify playlist page
* Download all the titles from MyFreeMp3 aka VK Music
* Generate a .m3u8 playlist file
*TODO*
* Download the html spotify page
* Dowload all songs in a folder
* Add proper command line options
* Document how to get a VK access token (need php and other things this
@ -38,6 +38,5 @@ BEWARE WIP
1. `git clone THIS`
1. `npm install`
1. Fill the `.env` with a `ACCESS_TOKEN=` for VK [HowTo](https://github.com/vodka2/vk-audio-token)
1. `curl https://open.spotify.com/playlist/6LgeEhc97Azxq6sinJQt6w > test.html`
1. `node . 0` => Download the first title of the playlist
1. (Optional) Only for direct VK search, fill the `.env` with a `ACCESS_TOKEN=` for VK [HowTo](https://github.com/vodka2/vk-audio-token)
1. `node . https://open.spotify.com/playlist/6LgeEhc97Azxq6sinJQt6w`

View File

@ -11,8 +11,10 @@ const m3u = require('m3u');
require('dotenv').config();
async function getSpotifyPlaylist(){
const spotifyPlaylistPageContent = await fs.readFile('./test.html');
async function getSpotifyPlaylist(playListUrl){
const spotifyPlaylistPageContent = await fetch(playListUrl)
.then(res => res.text())
const $ = Cheerio.load(spotifyPlaylistPageContent);
let playlist;
@ -44,19 +46,33 @@ async function searchOnVkMusic(query){
if(responseBody.error){
throw _.assign(responseBody.error, new Error(responseBody.error.error_msg));
} else {
return responseBody;
return responseBody.response.items;
}
});
}
async function searchOnMyFreeMp3(query){
const url = new URL('https://myfreemp3cc.com/api/search.php?callback=callback');
return await fetch(url, {
method: 'POST',
body: new URLSearchParams({
q: query,
page: 0,
}),
})
.then(res => res.text())
.then(jsonp => vm.runInNewContext(jsonp, { callback: (payload) => payload.response }));
}
function matchScore(spotifyMetas, vkmusicMetas){
const originalArtistNames = _.map(spotifyMetas.artists, 'name').join(', ').toLowerCase();
const originalTitle = spotifyMetas.name.toLowerCase();
const originalDuration = Math.round(spotifyMetas.duration_ms/1000);
const matchArtistScore = 1 - (leven(originalArtistNames, vkmusicMetas.artist.toLowerCase()) / Math.max(originalArtistNames.length, vkmusicMetas.artist.length));
const matchTitleScore = 1 - (leven(originalTitle, vkmusicMetas.title.toLowerCase()) / Math.max(originalTitle.length, vkmusicMetas.title.length));
const matchDurationScore = 1 - (Math.abs(originalDuration - vkmusicMetas.duration) / originalDuration); // TODO this can return more than 1 or less than 0
const matchArtistScore = 1 - (leven(originalArtistNames, _.get(vkmusicMetas, 'artist', '').toLowerCase()) / Math.max(originalArtistNames.length, _.get(vkmusicMetas, 'artist', '').length));
const matchTitleScore = 1 - (leven(originalTitle, _.get(vkmusicMetas, 'title', '').toLowerCase()) / Math.max(originalTitle.length, _.get(vkmusicMetas, 'title', '').length));
const matchDurationScore = 1 - (Math.abs(originalDuration - _.get(vkmusicMetas, 'duration', 0)) / originalDuration); // TODO this can return more than 1 or less than 0
debug('matchArtistScore=%f matchTitleScore=%f matchDurationScore=%f', matchArtistScore, matchTitleScore, matchDurationScore);
return matchArtistScore + matchTitleScore + matchDurationScore;
}
@ -67,24 +83,24 @@ async function generateM3U8Playlist(filesWithMetas){
await fs.writeFile(`playlist.m3u8`, m3uWriter.toString());
}
async function main(){
const playlist = await getSpotifyPlaylist();
async function main(playlistUrl){
const playlist = await getSpotifyPlaylist(playlistUrl);
const vkPlaylist = await Promise.map(playlist.tracks.items, async ({track}) => {
const artistNames = _.map(track.artists, 'name').join(', ');
debug('%s - %s', track.name, artistNames);
const { response } = await searchOnVkMusic(`${artistNames} ${track.name}`);
debug('response=%O', response);
const bestMatch = _.chain(response.items).map((item) => {
const items = await searchOnMyFreeMp3(`${artistNames} ${track.name}`);
debug('items=%O', items);
const bestMatch = _.chain(items).map((item) => {
item.score = matchScore(track, item);
return item;
}).sortBy('score').last().value();
debug('bestMatch=%O', bestMatch);
}).filter(item => item.score && item.url).sortBy('score').last().value();
if(!bestMatch){
console.log(`You are on your own for ${track.name} - ${artistNames}`);
return;
}
bestMatch.path = `${bestMatch.artist} - ${bestMatch.title}.mp3`;
await fs.access(bestMatch.path).catch(async () => {
debug('bestMatch=%O', bestMatch);
await fs.access(bestMatch.path).catch(async () => { // TODO find a proper way to not re-download, lower/uppercase problems
await fetch(bestMatch.url).then(res => {
res.body.pipe(createWriteStream(bestMatch.path));
});
@ -96,11 +112,11 @@ async function main(){
await generateM3U8Playlist(_.compact(vkPlaylist));
}
//main().catch(err => console.error(err));
main(process.argv[2]).catch(err => console.error(err));
async function test(query){
const { response } = await searchOnVkMusic(query);
const response = await searchOnMyFreeMp3(query);
console.log(response);
}
test(process.argv[2]);
//test(process.argv[2]);