tab-yoinker/06-output-generator/01-output-generator.js
2022-08-01 19:03:29 -05:00

136 lines
4.1 KiB
JavaScript

const sqlite3 = require('sqlite3');
const sqlite = require('sqlite');
const fs = require('fs');
const path = require('path');
const fsExtra = require('fs-extra');
function sanitizeFileName(name) {
// Windows Version (created for Windows, most likely works cross-platform too given my research)
// Allowed Characters: Extended Unicode Charset (1-255)
// Illegal file names: CON, PRN, AUX, NUL, COM1, COM2, ..., COM9, LPT1, LPT2, ..., LPT9
// Reserved Characters: <>:"/\|?*
// Solution: Replace reserved characters with empty string (''), bad characters with '_', and append '_' to bad names
// Illegal File Names (Windows)
if ([ 'CON', 'PRN', 'AUX', 'NUL',
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9' ].indexOf(name) != -1) { // TODO: case insensitive?
name += '_';
}
// Reserved Characters
name = name.replace(/[<>:\"\/\\|?*]/g, '');
// Allowed Characters
return name.split('').map(c => c.charCodeAt(0) < 255 && c.charCodeAt(0) > 0 ? c : '_').join('');
// Much stricter whitelist version
// replace bad characters with '_'
//return name.split('').map(c => /[A-Za-z0-9-]/.exec(c) ? c : '_').join('');
}
function getAvailableFileName(dir, name) {
name = sanitizeFileName(name);
let ext = path.extname(name);
let baseName = path.basename(name, ext);
let availableBaseName = baseName;
let tries = 1;
while (fs.existsSync(path.join(dir, availableBaseName + ext))) {
availableBaseName = baseName + '-' + (++tries);
}
return availableBaseName + ext;
}
function cleanTab(tab_text) {
return tab_text
.replace(/\[tab\]/g, '')
.replace(/\[\/tab\]/g, '')
.replace(/\[ch]/g, '')
.replace(/\[\/ch\]/g, '');
}
(async () => {
// Clear out old output directory
await fsExtra.emptyDir('./output/');
const db = await sqlite.open({
driver: sqlite3.Database,
filename: './input/tabs-full.db'
});
console.log('connected to db');
let total = (await db.get(`
SELECT COUNT(*) AS c FROM tabs WHERE tab_text IS NOT NULL
`)).c;
console.log(`${total} total tabs`);
let soFar = 0;
const totalRows = await db.each(`
SELECT
scrape_id
, id
, song_id
, song_name
, artist_id
, artist_name
, version
, version_description
, votes
, rating
, date
, tonality_name
, verified
, artist_url
, tab_url
, difficulty
, tuning
, type_name
, user_id
, user_iq
, username
, tab_text
FROM tabs
WHERE tab_text IS NOT NULL
ORDER BY rating * votes + votes
`, (err, row) => {
if (err) throw err;
soFar += 1;
let fileText =
`${row.song_name} [${row.song_id}]: ${row.tab_url}
By ${row.artist_name} [${row.artist_id}]: ${row.artist_url}
Rating: ${row.rating}, Votes: ${row.votes}
Tab By: ${row.username} [${row.user_id}] (${row.user_iq} iq)
Last Edit: ${new Date(row.date * 1000).toLocaleString()}${row.version_description ? '\n------------------------------------------------------------------------\n' + row.version_description : ''}
------------------------------------------------------------------------
${cleanTab(row.tab_text).trim()/* Remove [bbcode]tags[/bbcode] */}
------------------------------------------------------------------------
Tonality: ${row.tonality_name}
Difficulty: ${row.difficulty}
Tuning: ${row.tuning}
Type: ${row.type_name}
Verified: ${row.verified}
Version ${row.version}
`;
let typeDir = path.join('output', row.type_name ?? 'null');
if (!fs.existsSync(typeDir)) {
fs.mkdirSync(typeDir);
}
let fileDir = sanitizeFileName(row.artist_name + '-' + row.artist_id);
if (!fs.existsSync(path.join(typeDir, fileDir))) {
fs.mkdirSync(path.join(typeDir, fileDir));
}
let fileName = getAvailableFileName(path.join(typeDir, fileDir), row.song_name + '.txt');
fs.writeFileSync(path.join(typeDir, fileDir, fileName), fileText);
if (soFar % 100 == 0) {
console.log(`Tab #${soFar}/${total} (${(100 * soFar / total).toFixed(2)}%): ${path.join(typeDir, fileDir, fileName)}`);
}
});
})();