const sqlite3 = require('sqlite3'); const sqlite = require('sqlite'); const fs = require('fs'); const path = require('path'); const fsExtra = require('fs-extra'); function sanitizeFileName(name) { // Windows Version (created for Windows, most likely works cross-platform too given my research) // Allowed Characters: Extended Unicode Charset (1-255) // Illegal file names: CON, PRN, AUX, NUL, COM1, COM2, ..., COM9, LPT1, LPT2, ..., LPT9 // Reserved Characters: <>:"/\|?* // Solution: Replace reserved characters with empty string (''), bad characters with '_', and append '_' to bad names // Illegal File Names (Windows) if ([ 'CON', 'PRN', 'AUX', 'NUL', 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9', 'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9' ].indexOf(name) != -1) { // TODO: case insensitive? name += '_'; } // Reserved Characters name = name.replace(/[<>:\"\/\\|?*]/g, ''); // Allowed Characters return name.split('').map(c => c.charCodeAt(0) < 255 && c.charCodeAt(0) > 0 ? c : '_').join(''); // Much stricter whitelist version // replace bad characters with '_' //return name.split('').map(c => /[A-Za-z0-9-]/.exec(c) ? c : '_').join(''); } function getAvailableFileName(dir, name) { name = sanitizeFileName(name); let ext = path.extname(name); let baseName = path.basename(name, ext); let availableBaseName = baseName; let tries = 1; while (fs.existsSync(path.join(dir, availableBaseName + ext))) { availableBaseName = baseName + '-' + (++tries); } return availableBaseName + ext; } (async () => { // Clear out old output directory await fsExtra.emptyDir('./output/'); const db = await sqlite.open({ driver: sqlite3.Database, filename: './input/tabs-full.db' }); console.log('connected to db'); let total = (await db.get(` SELECT COUNT(*) AS c FROM tabs WHERE tab_text IS NOT NULL `)).c; console.log(`${total} total tabs`); let soFar = 0; const totalRows = await db.each(` SELECT scrape_id , id , song_id , song_name , artist_id , artist_name , version , version_description , votes , rating , date , tonality_name , verified , artist_url , tab_url , difficulty , tuning , type_name , user_id , user_iq , username , tab_text FROM tabs WHERE tab_text IS NOT NULL `, (err, row) => { if (err) throw err; soFar += 1; let fileText = `${row.song_name} [${row.song_id}]: ${row.tab_url} By ${row.artist_name} [${row.artist_id}]: ${row.artist_url} Rating: ${row.rating}, Votes: ${row.votes} Date: ${row.date} Tonality: ${row.tonality_name} Difficulty: ${row.difficulty} Tuning: ${row.tuning} Type: ${row.type_name} Tab By: ${row.username} [${row.user_id}] (${row.user_iq} iq) Verified: ${row.verified} Version ${row.version} ${row.version_description || ''} ${row.tab_text} `; let typeDir = path.join('output', row.type_name ?? 'null'); if (!fs.existsSync(typeDir)) { fs.mkdirSync(typeDir); } let fileDir = sanitizeFileName(row.artist_name + '-' + row.artist_id); if (!fs.existsSync(path.join(typeDir, fileDir))) { fs.mkdirSync(path.join(typeDir, fileDir)); } let fileName = getAvailableFileName(path.join(typeDir, fileDir), row.song_name + '.txt'); fs.writeFileSync(path.join(typeDir, fileDir, fileName), fileText); if (soFar % 100 == 0) { console.log(`Tab #${soFar}/${total} (${(100 * soFar / total).toFixed(2)}%): ${path.join(typeDir, fileDir, fileName)}`); } }); })();