127 lines
3.4 KiB
JavaScript
127 lines
3.4 KiB
JavaScript
const sqlite3 = require('sqlite3');
|
|
const sqlite = require('sqlite');
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const fsExtra = require('fs-extra');
|
|
|
|
function sanitizeFileName(name) {
|
|
// Windows Version (created for Windows, most likely works cross-platform too given my research)
|
|
// Allowed Characters: Extended Unicode Charset (1-255)
|
|
// Illegal file names: CON, PRN, AUX, NUL, COM1, COM2, ..., COM9, LPT1, LPT2, ..., LPT9
|
|
// Reserved Characters: <>:"/\|?*
|
|
// Solution: Replace reserved characters with empty string (''), bad characters with '_', and append '_' to bad names
|
|
|
|
// Illegal File Names (Windows)
|
|
if ([ 'CON', 'PRN', 'AUX', 'NUL',
|
|
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
|
|
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9' ].indexOf(name) != -1) { // TODO: case insensitive?
|
|
name += '_';
|
|
}
|
|
// Reserved Characters
|
|
name = name.replace(/[<>:\"\/\\|?*]/g, '');
|
|
// Allowed Characters
|
|
return name.split('').map(c => c.charCodeAt(0) < 255 && c.charCodeAt(0) > 0 ? c : '_').join('');
|
|
|
|
// Much stricter whitelist version
|
|
// replace bad characters with '_'
|
|
//return name.split('').map(c => /[A-Za-z0-9-]/.exec(c) ? c : '_').join('');
|
|
}
|
|
|
|
function getAvailableFileName(dir, name) {
|
|
name = sanitizeFileName(name);
|
|
let ext = path.extname(name);
|
|
let baseName = path.basename(name, ext);
|
|
let availableBaseName = baseName;
|
|
let tries = 1;
|
|
while (fs.existsSync(path.join(dir, availableBaseName + ext))) {
|
|
availableBaseName = baseName + '-' + (++tries);
|
|
}
|
|
return availableBaseName + ext;
|
|
}
|
|
|
|
(async () => {
|
|
// Clear out old output directory
|
|
await fsExtra.emptyDir('./output/');
|
|
|
|
const db = await sqlite.open({
|
|
driver: sqlite3.Database,
|
|
filename: './input/tabs-full.db'
|
|
});
|
|
|
|
console.log('connected to db');
|
|
|
|
let total = (await db.get(`
|
|
SELECT COUNT(*) AS c FROM tabs WHERE tab_text IS NOT NULL
|
|
`)).c;
|
|
|
|
console.log(`${total} total tabs`);
|
|
|
|
let soFar = 0;
|
|
const totalRows = await db.each(`
|
|
SELECT
|
|
scrape_id
|
|
, id
|
|
, song_id
|
|
, song_name
|
|
, artist_id
|
|
, artist_name
|
|
, version
|
|
, version_description
|
|
, votes
|
|
, rating
|
|
, date
|
|
, tonality_name
|
|
, verified
|
|
, artist_url
|
|
, tab_url
|
|
, difficulty
|
|
, tuning
|
|
, type_name
|
|
, user_id
|
|
, user_iq
|
|
, username
|
|
, tab_text
|
|
FROM tabs
|
|
WHERE tab_text IS NOT NULL
|
|
`, (err, row) => {
|
|
if (err) throw err;
|
|
|
|
soFar += 1;
|
|
let fileText =
|
|
`${row.song_name} [${row.song_id}]: ${row.tab_url}
|
|
By ${row.artist_name} [${row.artist_id}]: ${row.artist_url}
|
|
Rating: ${row.rating}, Votes: ${row.votes}
|
|
Date: ${row.date}
|
|
Tonality: ${row.tonality_name}
|
|
Difficulty: ${row.difficulty}
|
|
Tuning: ${row.tuning}
|
|
Type: ${row.type_name}
|
|
Tab By: ${row.username} [${row.user_id}] (${row.user_iq} iq)
|
|
Verified: ${row.verified}
|
|
Version ${row.version}
|
|
${row.version_description || ''}
|
|
|
|
${row.tab_text}
|
|
`;
|
|
let typeDir = path.join('output', row.type_name ?? 'null');
|
|
if (!fs.existsSync(typeDir)) {
|
|
fs.mkdirSync(typeDir);
|
|
}
|
|
|
|
let fileDir = sanitizeFileName(row.artist_name + '-' + row.artist_id);
|
|
if (!fs.existsSync(path.join(typeDir, fileDir))) {
|
|
fs.mkdirSync(path.join(typeDir, fileDir));
|
|
}
|
|
|
|
let fileName = getAvailableFileName(path.join(typeDir, fileDir), row.song_name + '.txt');
|
|
fs.writeFileSync(path.join(typeDir, fileDir, fileName), fileText);
|
|
|
|
if (soFar % 100 == 0) {
|
|
console.log(`Tab #${soFar}/${total} (${(100 * soFar / total).toFixed(2)}%): ${path.join(typeDir, fileDir, fileName)}`);
|
|
}
|
|
});
|
|
|
|
})();
|