diff --git a/package-lock.json b/package-lock.json index f05adae..1f7d0d8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "log4js": "^6.9.1", "nntp-js": "^1.0.4", "node-unrar-js": "^2.0.0", + "simple-yenc": "^1.0.4", "sqlite": "^5.1.1", "sqlite3": "^6.0.1", "xmlbuilder2": "^3.1.1", @@ -1019,6 +1020,16 @@ "simple-concat": "^1.0.0" } }, + "node_modules/simple-yenc": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/simple-yenc/-/simple-yenc-1.0.4.tgz", + "integrity": "sha512-5gvxpSd79e9a3V4QDYUqnqxeD4HGlhCakVpb6gMnDD7lexJggSBJRBO5h52y/iJrdXRilX9UCuDaIJhSWm5OWw==", + "license": "MIT", + "funding": { + "type": "individual", + "url": "https://github.com/sponsors/eshaz" + } + }, "node_modules/sprintf-js": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", diff --git a/package.json b/package.json index c5624c1..1d92c16 100644 --- a/package.json +++ b/package.json @@ -18,9 +18,10 @@ "log4js": "^6.9.1", "nntp-js": "^1.0.4", "node-unrar-js": "^2.0.0", + "simple-yenc": "^1.0.4", "sqlite": "^5.1.1", "sqlite3": "^6.0.1", "xmlbuilder2": "^3.1.1", "yencode": "^1.0.1" } -} \ No newline at end of file +} diff --git a/src/body.worker.js b/src/body.worker.js index e220774..f620dae 100644 --- a/src/body.worker.js +++ b/src/body.worker.js @@ -2,6 +2,7 @@ import { Queue, Worker } from 'bullmq'; import log4js from './logger.js'; import { headerQueue } from './header.worker.js'; import { acquire, release } from './nntp.pool.js'; +import { decodeYenc } from './yenc.util.js'; const bodyLogger = log4js.getLogger('body'); @@ -23,10 +24,8 @@ export const startBodyWorker = () => { try { conn = await acquire(); const bodyBuffer = (await conn.body(header['message-id'])).data; - - const firstNewlineIndex = bodyBuffer.indexOf('\\n'); - const firstLineBuffer = (firstNewlineIndex !== -1) ? bodyBuffer.slice(0, firstNewlineIndex) : bodyBuffer; - const firstLine = firstLineBuffer.toString(); + const decodedBuffer = decodeYenc(bodyBuffer); + const firstLine = decodedBuffer.toString().split('\\n')[0]; const match = firstLine.match(YENC_REGEX); diff --git a/src/collection.worker.js b/src/collection.worker.js index 89d934a..360474b 100644 --- a/src/collection.worker.js +++ b/src/collection.worker.js @@ -3,7 +3,7 @@ import log4js from './logger.js'; import { getDb } from './database.js'; import { acquire, release } from './nntp.pool.js'; import { createExtractorFromData } from 'node-unrar-js'; -import * as yEnc from 'simple-yenc'; +import { decodeYenc } from './yenc.util.js'; const logger = log4js.getLogger('collection'); @@ -45,9 +45,8 @@ export const startCollectionWorker = () => { conn = await acquire(); await conn.group('alt.binaries.test'); const bodyBuffer = (await conn.body(`<${firstPart.id}>`)).data; - const decodedUint8Array = yEnc.decode(bodyBuffer.toString('latin1')); - const buffer = Buffer.from(decodedUint8Array); - const extractor = await createExtractorFromData({ data: buffer }); + const decodedBuffer = decodeYenc(bodyBuffer); + const extractor = await createExtractorFromData({ data: decodedBuffer }); const fileList = extractor.getFileList(); logger.info(`Files in "${file.filename}":`, fileList); } catch (error) { diff --git a/src/download.js b/src/download.js index 80def39..062f454 100644 --- a/src/download.js +++ b/src/download.js @@ -1,7 +1,7 @@ import 'dotenv/config'; import { getDb } from './database.js'; import { acquire, release, shutdown } from './nntp.pool.js'; -import * as yEnc from 'simple-yenc'; +import { decodeYenc } from './yenc.util.js'; import fs from 'fs/promises'; import log4js from './logger.js'; @@ -35,9 +35,8 @@ async function downloadFile(fileId) { try { logger.debug(`Downloading part ${partNumber}/${file.parts} with message ID: ${segment.id}`); const bodyBuffer = (await conn.body(`<${segment.id}>`)).data; - const decodedUint8Array = yEnc.decode(bodyBuffer.toString('latin1')); - const buffer = Buffer.from(decodedUint8Array); - parts.push(buffer); + const decodedPart = decodeYenc(bodyBuffer); + parts.push(decodedPart); } catch (error) { if (error.code === 430) { logger.error(`Article not found for part ${partNumber} (Message ID: ${segment.id})`); diff --git a/src/yenc.test.js b/src/yenc.test.js index 4cedcbc..dc9ff58 100644 --- a/src/yenc.test.js +++ b/src/yenc.test.js @@ -1,13 +1,12 @@ -import yencode from 'yencode'; +import { decodeYenc } from './yenc.util.js'; import fs from 'fs/promises'; import { Buffer } from 'buffer'; async function runTest() { - const encodedData = await fs.readFile('HjVfQlWmHdUrQeQkRiLkTwEj-1779830864932@nyuu.bin'); - const correctlyDecodedData = await fs.readFile('Dragon.Ball.S01E119.MULTI.BDRip.REMASTERED.1080p.x264.DTS-LILAS.par2'); + const encodedData = await fs.readFile('files/HjVfQlWmHdUrQeQkRiLkTwEj-1779830864932@nyuu.bin'); + const correctlyDecodedData = await fs.readFile('files/Dragon.Ball.S01E119.MULTI.BDRip.REMASTERED.1080p.x264.DTS-LILAS.par2-good'); - const decodedBuffer = yencode.decode(encodedData); - await fs.writeFile('decoded.bin', decodedBuffer) + const decodedBuffer = decodeYenc(encodedData); if (Buffer.compare(decodedBuffer, correctlyDecodedData) === 0) { console.log('Test passed: Decoded data matches the correctly decoded file.'); @@ -15,7 +14,7 @@ async function runTest() { console.error('Test failed: Decoded data does not match the correctly decoded file.'); console.error('Decoded buffer length:', decodedBuffer.length); console.error('Correct buffer length:', correctlyDecodedData.length); - await fs.writeFile('test-decoded-output.bin', decodedBuffer); + await fs.writeFile('files/test-decoded-output.bin', decodedBuffer); } } diff --git a/src/yenc.util.js b/src/yenc.util.js new file mode 100644 index 0000000..7639fd4 --- /dev/null +++ b/src/yenc.util.js @@ -0,0 +1,91 @@ +import yencode from 'yencode'; + +/** + * Parses yEnc metadata from a header or footer line. + * @param {string} line The line to parse. + * @returns {object} A key-value map of the metadata. + */ +function parseMetaLine(line) { + const meta = {}; + const parts = line.split(' '); + for (const part of parts) { + const eqIndex = part.indexOf('='); + if (eqIndex !== -1) { + meta[part.slice(0, eqIndex)] = part.slice(eqIndex + 1); + } + } + return meta; +} + +/** + * Extracts metadata from a yEnc-encoded buffer. + * @param {Buffer} encodedBuffer The yEnc-encoded buffer. + * @returns {object} An object containing all metadata from the header and footer. + */ +export function parseYencMeta(encodedBuffer) { + const headerBegin = Buffer.from('=ybegin'); + const headerPart = Buffer.from('=ypart'); + const footer = Buffer.from('\n=yend'); + + const headerStartIndex = encodedBuffer.indexOf(headerBegin); + const partStartIndex = encodedBuffer.indexOf(headerPart); + const footerStartIndex = encodedBuffer.lastIndexOf(footer); + + if ((headerStartIndex === -1 && partStartIndex === -1) || footerStartIndex === -1) { + throw new Error('Invalid yEnc data: missing header or footer.'); + } + + const headerIndex = headerStartIndex !== -1 ? headerStartIndex : partStartIndex; + const headerEndIndex = encodedBuffer.indexOf(Buffer.from('\\n'), headerIndex); + const headerLine = encodedBuffer.subarray(headerIndex, headerEndIndex).toString(); + + const footerEndIndex = encodedBuffer.indexOf(Buffer.from('\\n'), footerStartIndex); + const footerLine = encodedBuffer.subarray(footerStartIndex, footerEndIndex).toString(); + + const meta = { + ...parseMetaLine(headerLine), + ...parseMetaLine(footerLine), + }; + + // Convert numeric values + for (const key in meta) { + if (!isNaN(meta[key])) { + meta[key] = parseInt(meta[key], 10); + } + } + + return meta; +} + +/** + * Decodes a yEnc-encoded buffer and optionally writes it to a target buffer. + * @param {Buffer} encodedBuffer The yEnc-encoded buffer. + * @param {Buffer} [targetBuffer] An optional buffer to write the decoded data into. + * @returns {Buffer} The decoded data (or the target buffer if provided). + */ +export function decodeYenc(encodedBuffer, targetBuffer) { + const meta = parseYencMeta(encodedBuffer); + + const header = meta.part ? Buffer.from(`=ypart`) : Buffer.from(`=ybegin`); + const footer = Buffer.from(`\n=yend`); + + const contentStartIndex = encodedBuffer.indexOf('\n', encodedBuffer.indexOf(header)) + 1; + const contentEndIndex = encodedBuffer.lastIndexOf(footer); + const dataToDecode = encodedBuffer.subarray(contentStartIndex, contentEndIndex); + + const decoded = yencode.decode(dataToDecode); + + if (decoded.length !== meta.size) { + throw new Error(`Decoded size (${decoded.length}) does not match expected size (${meta.size}).`); + } + + if (targetBuffer) { + if (meta.begin === undefined) { + throw new Error('Cannot write to target buffer: missing "begin" offset in yEnc metadata.'); + } + decoded.copy(targetBuffer, meta.begin - 1); + return targetBuffer; + } + + return decoded; +}