From b314ea7c84b19cb3a3dee032342aa37e353946cc Mon Sep 17 00:00:00 2001 From: DaxServer Date: Sun, 28 Sep 2025 11:32:49 +0200 Subject: [PATCH] draft: commons upload --- backend/src/api/project/index.ts | 10 +- backend/src/services/commons-upload.ts | 361 ++++++++++++++++++ .../file-processor/file-buffer-extractor.ts | 31 ++ .../services/file-processor/file-reader.ts | 16 + .../file-processor/hash-calculator.ts | 5 + .../file-processor/image-extractor.ts | 21 + .../file-processor/metadata-builder.ts | 30 ++ .../services/file-processor/mime-detector.ts | 28 ++ .../multiple-files-processor.ts | 43 +++ .../src/services/file-processor/processor.ts | 26 ++ backend/src/services/file-processor/types.ts | 33 ++ .../src/services/file-processor/validation.ts | 53 +++ .../validator/extension-validator.ts | 23 ++ .../validator/file-size-validator.ts | 23 ++ .../validator/mime-type-validator.ts | 23 ++ backend/src/types/wikibase-upload.ts | 126 ++++++ .../constraint-validation.service.test.ts | 0 backend/tests/upload/file-processor.test.ts | 342 +++++++++++++++++ 18 files changed, 1190 insertions(+), 4 deletions(-) create mode 100644 backend/src/services/commons-upload.ts create mode 100644 backend/src/services/file-processor/file-buffer-extractor.ts create mode 100644 backend/src/services/file-processor/file-reader.ts create mode 100644 backend/src/services/file-processor/hash-calculator.ts create mode 100644 backend/src/services/file-processor/image-extractor.ts create mode 100644 backend/src/services/file-processor/metadata-builder.ts create mode 100644 backend/src/services/file-processor/mime-detector.ts create mode 100644 backend/src/services/file-processor/multiple-files-processor.ts create mode 100644 backend/src/services/file-processor/processor.ts create mode 100644 backend/src/services/file-processor/types.ts create mode 100644 backend/src/services/file-processor/validation.ts create mode 100644 backend/src/services/file-processor/validator/extension-validator.ts create mode 100644 backend/src/services/file-processor/validator/file-size-validator.ts create mode 100644 backend/src/services/file-processor/validator/mime-type-validator.ts create mode 100644 backend/src/types/wikibase-upload.ts rename backend/{src/services/__tests__ => tests}/constraint-validation.service.test.ts (100%) create mode 100644 backend/tests/upload/file-processor.test.ts diff --git a/backend/src/api/project/index.ts b/backend/src/api/project/index.ts index 7c681c4..8eacd0c 100644 --- a/backend/src/api/project/index.ts +++ b/backend/src/api/project/index.ts @@ -1,9 +1,9 @@ import { cleanupProject, generateProjectName } from '@backend/api/project/project.import-file' import { + GetProjectByIdResponse, PaginationQuery, ProjectParams, ProjectResponseSchema, - GetProjectByIdResponse, type Project, } from '@backend/api/project/schemas' import { databasePlugin } from '@backend/plugins/database' @@ -255,9 +255,11 @@ export const projectRoutes = new Elysia({ prefix: '/api/project' }) error: 'Project name must be between 1 and 255 characters long if provided', }), ), - hasHeaders: t.Optional(t.BooleanString({ - default: true, - })), + hasHeaders: t.Optional( + t.BooleanString({ + default: true, + }), + ), }), response: { 201: t.Object({ diff --git a/backend/src/services/commons-upload.ts b/backend/src/services/commons-upload.ts new file mode 100644 index 0000000..9518c4b --- /dev/null +++ b/backend/src/services/commons-upload.ts @@ -0,0 +1,361 @@ +// import type { CommonsAuthService } from '@backend/services/commons-auth' +import type { ProcessedFile } from '@backend/services/file-processor/types' +import type { CommonsUploadResponse, UploadValidationError } from '@backend/types/wikibase-upload' + +export interface UploadOptions { + filename?: string + description: string + categories?: string[] + license?: string + author?: string + source?: string + date?: string + overwrite?: boolean + ignoreWarnings?: boolean +} + +export interface UploadProgress { + uploadId: string + filename: string + bytesUploaded: number + totalBytes: number + percentage: number + status: 'uploading' | 'processing' | 'completed' | 'failed' + error?: string +} + +export interface UploadResult { + success: boolean + filename?: string + url?: string + pageUrl?: string + warnings?: Record + errors?: UploadValidationError[] +} + +export interface ChunkedUploadSession { + sessionKey: string + offset: number + totalSize: number + filename: string +} + +export class CommonsUploadService { + private readonly chunkSize = 1024 * 1024 * 4 // 4MB chunks + private readonly maxRetries = 3 + private readonly retryDelay = 1000 // 1 second + + constructor(private authService: any) {} + + private generateWikitext = (options: UploadOptions): string => { + const parts: string[] = [] + + // Description + if (options.description) { + parts.push(`== {{int:filedesc}} ==`) + parts.push(options.description) + parts.push('') + } + + // Licensing + parts.push(`== {{int:license-header}} ==`) + if (options.license) { + parts.push(`{{${options.license}}}`) + } else { + parts.push('{{subst:unc}}') + } + parts.push('') + + // Categories + if (options.categories && options.categories.length > 0) { + for (const category of options.categories) { + parts.push(`[[Category:${category}]]`) + } + } + + return parts.join('\n') + } + + private sleep = (ms: number): Promise => { + return new Promise((resolve) => setTimeout(resolve, ms)) + } + + private retryOperation = async ( + operation: () => Promise, + retries = this.maxRetries, + ): Promise => { + let lastError: Error + + for (let attempt = 0; attempt <= retries; attempt++) { + try { + return await operation() + } catch (error) { + lastError = error as Error + + if (attempt < retries) { + await this.sleep(this.retryDelay * Math.pow(2, attempt)) + } + } + } + + throw lastError! + } + + private initializeChunkedUpload = async ( + filename: string, + fileSize: number, + ): Promise => { + const params = { + action: 'upload', + stash: '1', + filesize: fileSize.toString(), + filename, + format: 'json', + } + + const response = await this.authService.makeAuthenticatedRequest('POST', params) + + if (!response.ok) { + throw new Error(`Failed to initialize chunked upload: ${response.statusText}`) + } + + const data = (await response.json()) as { upload?: { sessionkey?: string } } + + if (!data.upload?.sessionkey) { + throw new Error('Failed to get session key for chunked upload') + } + + return { + sessionKey: data.upload.sessionkey, + offset: 0, + totalSize: fileSize, + filename, + } + } + + private uploadChunk = async ( + session: ChunkedUploadSession, + chunk: Buffer, + isLastChunk: boolean, + ): Promise => { + const formData = new FormData() + formData.append('action', 'upload') + formData.append('stash', '1') + formData.append('sessionkey', session.sessionKey) + formData.append('offset', session.offset.toString()) + formData.append('format', 'json') + + if (isLastChunk) { + formData.append('filename', session.filename) + } + + const blob = new Blob([chunk]) + formData.append('chunk', blob, 'chunk') + + const params = { + action: 'upload', + stash: '1', + sessionkey: session.sessionKey, + offset: session.offset.toString(), + format: 'json', + } + + const response = await this.authService.makeAuthenticatedRequest('POST', params, formData) + + if (!response.ok) { + throw new Error(`Failed to upload chunk: ${response.statusText}`) + } + + const data = (await response.json()) as { upload?: { result?: string } } + + if (data.upload?.result !== 'Continue' && data.upload?.result !== 'Success') { + throw new Error(`Chunk upload failed: ${data.upload?.result || 'Unknown error'}`) + } + + session.offset += chunk.length + } + + private finalizeUpload = async ( + session: ChunkedUploadSession, + options: UploadOptions, + ): Promise => { + const wikitext = this.generateWikitext(options) + + const params: Record = { + action: 'upload', + sessionkey: session.sessionKey, + filename: options.filename || session.filename, + text: wikitext, + format: 'json', + } + + if (options.overwrite) { + params.ignorewarnings = '1' + } + + const response = await this.authService.makeAuthenticatedRequest('POST', params) + + if (!response.ok) { + throw new Error(`Failed to finalize upload: ${response.statusText}`) + } + + return (await response.json()) as CommonsUploadResponse + } + + uploadFile = async ( + processedFile: ProcessedFile, + options: UploadOptions, + onProgress?: (progress: UploadProgress) => void, + ): Promise => { + if (!processedFile.isValid) { + return { + success: false, + errors: processedFile.errors, + } + } + + const uploadId = crypto.randomUUID() + const filename = options.filename || processedFile.metadata.filename + const fileBuffer = processedFile.buffer + const totalBytes = fileBuffer.length + + try { + return await this.uploadChunkedFile(processedFile, options, uploadId, onProgress) + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error' + + onProgress?.({ + uploadId, + filename, + bytesUploaded: 0, + totalBytes, + percentage: 0, + status: 'failed', + error: errorMessage, + }) + + return { + success: false, + errors: [ + { + field: 'upload', + code: 'UPLOAD_FAILED', + message: errorMessage, + details: { error: errorMessage }, + }, + ], + } + } + } + + private uploadChunkedFile = async ( + processedFile: ProcessedFile, + options: UploadOptions, + uploadId: string, + onProgress?: (progress: UploadProgress) => void, + ): Promise => { + const filename = options.filename || processedFile.metadata.filename + const fileBuffer = processedFile.buffer + const totalBytes = fileBuffer.length + + // Initialize chunked upload + const session = await this.initializeChunkedUpload(filename, totalBytes) + + let bytesUploaded = 0 + + // Upload chunks + while (bytesUploaded < totalBytes) { + const remainingBytes = totalBytes - bytesUploaded + const chunkSize = Math.min(this.chunkSize, remainingBytes) + const chunk = fileBuffer.subarray(bytesUploaded, bytesUploaded + chunkSize) + const isLastChunk = bytesUploaded + chunkSize >= totalBytes + + await this.retryOperation(async () => { + await this.uploadChunk(session, chunk, isLastChunk) + }) + + bytesUploaded += chunkSize + + onProgress?.({ + uploadId, + filename, + bytesUploaded, + totalBytes, + percentage: Math.round((bytesUploaded / totalBytes) * 100), + status: 'uploading', + }) + } + + // Finalize upload + onProgress?.({ + uploadId, + filename, + bytesUploaded: totalBytes, + totalBytes, + percentage: 100, + status: 'processing', + }) + + const result = await this.retryOperation(async () => { + return await this.finalizeUpload(session, options) + }) + + if (result.upload.result === 'Success') { + onProgress?.({ + uploadId, + filename, + bytesUploaded: totalBytes, + totalBytes, + percentage: 100, + status: 'completed', + }) + + return { + success: true, + filename: result.upload.filename, + url: result.upload.imageinfo?.url, + pageUrl: result.upload.imageinfo?.descriptionurl, + warnings: result.upload.warnings, + } + } else { + return { + success: false, + errors: [ + { + field: 'upload', + code: 'UPLOAD_REJECTED', + message: `Upload rejected: ${result.upload.result}`, + details: { result: result.upload.result, warnings: result.upload.warnings }, + }, + ], + } + } + } + + checkFileExists = async (filename: string): Promise => { + const params = { + action: 'query', + titles: `File:${filename}`, + format: 'json', + } + + const response = await this.authService.makeAuthenticatedRequest('GET', params) + + if (!response.ok) { + return false + } + + const data = (await response.json()) as { + query?: { + pages?: Record + } + } + + if (!data.query?.pages) { + return false + } + + const pages = Object.values(data.query.pages) + return pages.length > 0 && !pages[0]?.missing + } +} diff --git a/backend/src/services/file-processor/file-buffer-extractor.ts b/backend/src/services/file-processor/file-buffer-extractor.ts new file mode 100644 index 0000000..98743d2 --- /dev/null +++ b/backend/src/services/file-processor/file-buffer-extractor.ts @@ -0,0 +1,31 @@ +import { readFromFilepath, readFromUrl } from '@backend/services/file-processor/file-reader' +import type { FileInput } from '@backend/services/file-processor/types' +import { basename } from 'node:path' + +export const extractFileBuffer = async ( + uploadFile: FileInput, +): Promise<{ buffer: ArrayBuffer; size: number; filename: string }> => { + let buffer: ArrayBuffer + let size: number + let filename: string + + if (uploadFile.type === 'filepath') { + if (!uploadFile.path) { + throw new Error('File path is required for filepath type') + } + const result = await readFromFilepath(uploadFile.path) + buffer = result.buffer + size = result.size + filename = basename(uploadFile.path) + } else { + if (!uploadFile.url) { + throw new Error('URL is required for url type') + } + const result = await readFromUrl(uploadFile.url) + buffer = result.buffer + size = result.size + filename = uploadFile.filename || 'unknown' + } + + return { buffer, size, filename } +} diff --git a/backend/src/services/file-processor/file-reader.ts b/backend/src/services/file-processor/file-reader.ts new file mode 100644 index 0000000..8142795 --- /dev/null +++ b/backend/src/services/file-processor/file-reader.ts @@ -0,0 +1,16 @@ +export const readFromFilepath = async ( + path: string, +): Promise<{ buffer: ArrayBuffer; size: number }> => { + const buffer = await Bun.file(path).arrayBuffer() + return { buffer, size: buffer.byteLength } +} + +export const readFromUrl = async (url: string): Promise<{ buffer: ArrayBuffer; size: number }> => { + const response = await fetch(url) + if (!response.ok) { + throw new Error(`Failed to download file from URL: ${response.statusText}`) + } + + const arrayBuffer = await response.arrayBuffer() + return { buffer: arrayBuffer, size: arrayBuffer.byteLength } +} diff --git a/backend/src/services/file-processor/hash-calculator.ts b/backend/src/services/file-processor/hash-calculator.ts new file mode 100644 index 0000000..4bc4fb7 --- /dev/null +++ b/backend/src/services/file-processor/hash-calculator.ts @@ -0,0 +1,5 @@ +const hasher = new Bun.CryptoHasher('sha256') + +export const calculateFileHash = (buffer: Buffer): string => { + return hasher.update(buffer).digest('hex') +} diff --git a/backend/src/services/file-processor/image-extractor.ts b/backend/src/services/file-processor/image-extractor.ts new file mode 100644 index 0000000..c2c548d --- /dev/null +++ b/backend/src/services/file-processor/image-extractor.ts @@ -0,0 +1,21 @@ +import type { FileMetadata } from '@backend/services/file-processor/types' +import sharp from 'sharp' + +export const extractImageMetadata = async ( + buffer: Buffer, + mimeType: string, +): Promise<{ + imageMetadata?: FileMetadata['imageMetadata'] +}> => { + if (!mimeType.startsWith('image/')) { + return {} + } + + try { + const imageMetadata = await sharp(buffer).metadata() + return { imageMetadata } + } catch (error) { + console.warn('Sharp failed to process image:', error) + return {} + } +} diff --git a/backend/src/services/file-processor/metadata-builder.ts b/backend/src/services/file-processor/metadata-builder.ts new file mode 100644 index 0000000..fa316e2 --- /dev/null +++ b/backend/src/services/file-processor/metadata-builder.ts @@ -0,0 +1,30 @@ +import { calculateFileHash } from '@backend/services/file-processor/hash-calculator' +import { extractImageMetadata } from '@backend/services/file-processor/image-extractor' +import { detectMimeType } from '@backend/services/file-processor/mime-detector' +import type { FileInput, FileMetadata } from '@backend/services/file-processor/types' +import { extname } from 'node:path' + +export const buildFileMetadata = async ( + buffer: ArrayBuffer, + size: number, + filename: string, + uploadFile: FileInput, +): Promise => { + const extension = extname(filename) + const mimeType = + uploadFile.type === 'url' && uploadFile.mimeType + ? uploadFile.mimeType + : await detectMimeType(Buffer.from(buffer), extension) + + const hash = calculateFileHash(Buffer.from(buffer)) + const imageData = await extractImageMetadata(Buffer.from(buffer), mimeType) + + return { + filename, + size, + mimeType, + extension, + hash, + imageMetadata: imageData.imageMetadata, + } +} diff --git a/backend/src/services/file-processor/mime-detector.ts b/backend/src/services/file-processor/mime-detector.ts new file mode 100644 index 0000000..c779b8d --- /dev/null +++ b/backend/src/services/file-processor/mime-detector.ts @@ -0,0 +1,28 @@ +import { fileTypeFromBuffer } from 'file-type' + +const mimeMap: Record = { + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.gif': 'image/gif', + '.webp': 'image/webp', + '.svg': 'image/svg+xml', + '.tiff': 'image/tiff', + '.tif': 'image/tiff', + '.mp4': 'video/mp4', + '.webm': 'video/webm', + '.ogv': 'video/ogg', + '.mp3': 'audio/mp3', + '.wav': 'audio/wav', + '.ogg': 'audio/ogg', + '.pdf': 'application/pdf', +} + +export const detectMimeType = async (buffer: Buffer, extension: string): Promise => { + const detectedType = await fileTypeFromBuffer(buffer) + if (detectedType) { + return detectedType.mime + } + + return mimeMap[extension.toLowerCase()] || 'application/octet-stream' +} diff --git a/backend/src/services/file-processor/multiple-files-processor.ts b/backend/src/services/file-processor/multiple-files-processor.ts new file mode 100644 index 0000000..8b6a169 --- /dev/null +++ b/backend/src/services/file-processor/multiple-files-processor.ts @@ -0,0 +1,43 @@ +import { processFile } from '@backend/services/file-processor/processor' +import type { + FileInput, + FileValidationOptions, + ProcessedFile, +} from '@backend/services/file-processor/types' +import { basename } from 'node:path' + +export const processMultipleFiles = async ( + uploadFiles: FileInput[], + options: Partial = {}, +): Promise => { + const results = await Promise.allSettled(uploadFiles.map((file) => processFile(file, options))) + + return results.map((result, index) => { + if (result.status === 'fulfilled') { + return result.value + } else { + return { + metadata: { + filename: + uploadFiles[index]?.type === 'filepath' + ? basename(uploadFiles[index].path!) + : uploadFiles[index]?.filename || 'unknown', + size: 0, + mimeType: '', + extension: '', + hash: '', + }, + buffer: Buffer.alloc(0), + isValid: false, + errors: [ + { + field: 'file', + code: 'PROCESSING_FAILED', + message: `Failed to process file: ${result.reason}`, + details: { error: result.reason }, + }, + ], + } + } + }) +} diff --git a/backend/src/services/file-processor/processor.ts b/backend/src/services/file-processor/processor.ts new file mode 100644 index 0000000..3a3dcc5 --- /dev/null +++ b/backend/src/services/file-processor/processor.ts @@ -0,0 +1,26 @@ +import { extractFileBuffer } from '@backend/services/file-processor/file-buffer-extractor' +import { buildFileMetadata } from '@backend/services/file-processor/metadata-builder' +import type { + FileInput, + FileValidationOptions, + ProcessedFile, +} from '@backend/services/file-processor/types' +import { defaultValidationOptions, validateFile } from '@backend/services/file-processor/validation' + +export const processFile = async ( + uploadFile: FileInput, + options: Partial = {}, +): Promise => { + const validationOptions = { ...defaultValidationOptions, ...options } + + const { buffer, size, filename } = await extractFileBuffer(uploadFile) + const metadata = await buildFileMetadata(buffer, size, filename, uploadFile) + const errors = validateFile(metadata, validationOptions) + + return { + metadata, + buffer: Buffer.from(buffer), + isValid: errors.length === 0, + errors, + } +} diff --git a/backend/src/services/file-processor/types.ts b/backend/src/services/file-processor/types.ts new file mode 100644 index 0000000..4fa4719 --- /dev/null +++ b/backend/src/services/file-processor/types.ts @@ -0,0 +1,33 @@ +import type { UploadValidationError } from '@backend/types/wikibase-upload' +import type { Metadata as SharpMetadata } from 'sharp' + +export interface FileInput { + type: 'filepath' | 'url' + path?: string + url?: string + filename?: string + mimeType?: string +} + +export interface FileMetadata { + filename: string + size: number + mimeType: string + extension: string + hash: string + imageMetadata?: SharpMetadata +} + +export interface ProcessedFile { + metadata: FileMetadata + buffer: Buffer + isValid: boolean + errors: UploadValidationError[] +} + +export interface FileValidationOptions { + maxFileSize: number + allowedMimeTypes: string[] + allowedExtensions: string[] + requireDimensions: boolean +} diff --git a/backend/src/services/file-processor/validation.ts b/backend/src/services/file-processor/validation.ts new file mode 100644 index 0000000..89c6e9f --- /dev/null +++ b/backend/src/services/file-processor/validation.ts @@ -0,0 +1,53 @@ +import type { FileMetadata, FileValidationOptions } from '@backend/services/file-processor/types' +import { validateExtension } from '@backend/services/file-processor/validator/extension-validator' +import { validateFileSize } from '@backend/services/file-processor/validator/file-size-validator' +import { validateMimeType } from '@backend/services/file-processor/validator/mime-type-validator' +import type { UploadValidationError } from '@backend/types/wikibase-upload' + +export const defaultValidationOptions: FileValidationOptions = { + maxFileSize: 100 * 1024 * 1024, // 100MB + allowedMimeTypes: [ + 'image/jpeg', + 'image/png', + 'image/gif', + 'image/webp', + 'image/svg+xml', + 'image/tiff', + 'video/mp4', + 'video/webm', + 'video/ogg', + 'audio/mp3', + 'audio/wav', + 'audio/ogg', + 'application/pdf', + ], + allowedExtensions: [ + '.jpg', + '.jpeg', + '.png', + '.gif', + '.webp', + '.svg', + '.tiff', + '.tif', + '.mp4', + '.webm', + '.ogv', + '.mp3', + '.wav', + '.ogg', + '.pdf', + ], + requireDimensions: false, +} + +export const validateFile = ( + metadata: FileMetadata, + options: FileValidationOptions, +): UploadValidationError[] => { + return [ + ...validateFileSize(metadata, options.maxFileSize), + ...validateMimeType(metadata, options.allowedMimeTypes), + ...validateExtension(metadata, options.allowedExtensions), + ] +} diff --git a/backend/src/services/file-processor/validator/extension-validator.ts b/backend/src/services/file-processor/validator/extension-validator.ts new file mode 100644 index 0000000..7bc3b84 --- /dev/null +++ b/backend/src/services/file-processor/validator/extension-validator.ts @@ -0,0 +1,23 @@ +import type { FileMetadata } from '@backend/services/file-processor/types' +import type { UploadValidationError } from '@backend/types/wikibase-upload' + +export const validateExtension = ( + metadata: FileMetadata, + allowedExtensions: string[], +): UploadValidationError[] => { + const errors: UploadValidationError[] = [] + + if (!allowedExtensions.includes(metadata.extension.toLowerCase())) { + errors.push({ + field: 'file', + code: 'INVALID_EXTENSION', + message: `File extension ${metadata.extension} is not allowed`, + details: { + actualExtension: metadata.extension, + allowedExtensions, + }, + }) + } + + return errors +} diff --git a/backend/src/services/file-processor/validator/file-size-validator.ts b/backend/src/services/file-processor/validator/file-size-validator.ts new file mode 100644 index 0000000..db73014 --- /dev/null +++ b/backend/src/services/file-processor/validator/file-size-validator.ts @@ -0,0 +1,23 @@ +import type { FileMetadata } from '@backend/services/file-processor/types' +import type { UploadValidationError } from '@backend/types/wikibase-upload' + +export const validateFileSize = ( + metadata: FileMetadata, + maxFileSize: number, +): UploadValidationError[] => { + const errors: UploadValidationError[] = [] + + if (metadata.size > maxFileSize) { + errors.push({ + field: 'file', + code: 'FILE_TOO_LARGE', + message: `File size ${metadata.size} bytes exceeds maximum allowed size of ${maxFileSize} bytes`, + details: { + actualSize: metadata.size, + maxSize: maxFileSize, + }, + }) + } + + return errors +} diff --git a/backend/src/services/file-processor/validator/mime-type-validator.ts b/backend/src/services/file-processor/validator/mime-type-validator.ts new file mode 100644 index 0000000..062d85b --- /dev/null +++ b/backend/src/services/file-processor/validator/mime-type-validator.ts @@ -0,0 +1,23 @@ +import type { FileMetadata } from '@backend/services/file-processor/types' +import type { UploadValidationError } from '@backend/types/wikibase-upload' + +export const validateMimeType = ( + metadata: FileMetadata, + allowedMimeTypes: string[], +): UploadValidationError[] => { + const errors: UploadValidationError[] = [] + + if (!allowedMimeTypes.includes(metadata.mimeType)) { + errors.push({ + field: 'file', + code: 'INVALID_MIME_TYPE', + message: `MIME type ${metadata.mimeType} is not allowed`, + details: { + actualMimeType: metadata.mimeType, + allowedMimeTypes, + }, + }) + } + + return errors +} diff --git a/backend/src/types/wikibase-upload.ts b/backend/src/types/wikibase-upload.ts new file mode 100644 index 0000000..0d51e93 --- /dev/null +++ b/backend/src/types/wikibase-upload.ts @@ -0,0 +1,126 @@ +import { ItemSchema } from '@backend/api/project/project.wikibase' +import { UUIDPattern } from '@backend/api/project/schemas' +import { t } from 'elysia' + +// Schema extension for Wikibase upload functionality +// Extends existing Wikibase schema without pollution +export const WikibaseUploadSchema = ItemSchema.extend({ + uploadColumn: t.Optional(t.String()), + wikitext: t.Optional(t.String()), +}) +export type WikibaseUploadSchema = typeof WikibaseUploadSchema.static + +export const UploadJobStatus = t.Union([ + t.Literal('pending'), + t.Literal('processing'), + t.Literal('completed'), + t.Literal('failed'), + t.Literal('cancelled'), +]) +export type UploadJobStatus = typeof UploadJobStatus.static + +export const UploadFileStatus = t.Union([ + t.Literal('pending'), + t.Literal('uploading'), + t.Literal('uploaded'), + t.Literal('failed'), + t.Literal('skipped'), +]) +export type UploadFileStatus = typeof UploadFileStatus.static + +export const UploadJob = t.Object({ + id: UUIDPattern, + projectId: t.String(), + status: UploadJobStatus, + totalFiles: t.Number({ + minimum: 0, + type: 'integer', + }), + processedFiles: t.Number({ + minimum: 0, + type: 'integer', + }), + successfulUploads: t.Number({ + minimum: 0, + type: 'integer', + }), + failedUploads: t.Number({ + minimum: 0, + type: 'integer', + }), + createdAt: t.Date(), + updatedAt: t.Date(), + completedAt: t.Optional(t.Date()), + errorMessage: t.Optional(t.String()), +}) +export type UploadJob = typeof UploadJob.static + +export const UploadFile = t.Object({ + id: UUIDPattern, + jobId: UUIDPattern, + fileName: t.String(), + filePath: t.Optional(t.String()), + fileUrl: t.Optional(t.String()), + fileSize: t.Optional( + t.Number({ + minimum: 0, + type: 'integer', + }), + ), + mimeType: t.Optional(t.String()), + status: UploadFileStatus, + commonsFileName: t.Optional(t.String()), + commonsUrl: t.Optional(t.String()), + wikitext: t.Optional(t.String()), + errorMessage: t.Optional(t.String()), + uploadedAt: t.Optional(t.Date()), +}) +export type UploadFile = typeof UploadFile.static + +export const CommonsUploadResponse = t.Object({ + upload: t.Object({ + result: t.String(), + filename: t.Optional(t.String()), + imageinfo: t.Optional( + t.Object({ + url: t.String({ + format: 'uri', + }), + descriptionurl: t.String({ + format: 'uri', + }), + }), + ), + warnings: t.Optional(t.Record(t.String(), t.String())), + }), +}) +export type CommonsUploadResponse = typeof CommonsUploadResponse.static + +export const UploadValidationError = t.Object({ + field: t.String(), + message: t.String(), + code: t.String(), + details: t.Optional(t.Record(t.String(), t.Unknown())), +}) +export type UploadValidationError = typeof UploadValidationError.static + +export const UploadConfiguration = t.Object({ + maxFileSize: t.Number({ + minimum: 1, + type: 'integer', + }), + allowedMimeTypes: t.Array(t.String()), + chunkSize: t.Number({ + minimum: 1024, + type: 'integer', + }), + maxConcurrentUploads: t.Number({ + minimum: 1, + type: 'integer', + }), + retryAttempts: t.Number({ + minimum: 0, + type: 'integer', + }), +}) +export type UploadConfiguration = typeof UploadConfiguration.static diff --git a/backend/src/services/__tests__/constraint-validation.service.test.ts b/backend/tests/constraint-validation.service.test.ts similarity index 100% rename from backend/src/services/__tests__/constraint-validation.service.test.ts rename to backend/tests/constraint-validation.service.test.ts diff --git a/backend/tests/upload/file-processor.test.ts b/backend/tests/upload/file-processor.test.ts new file mode 100644 index 0000000..650243f --- /dev/null +++ b/backend/tests/upload/file-processor.test.ts @@ -0,0 +1,342 @@ +import { processMultipleFiles } from '@backend/services/file-processor/multiple-files-processor' +import { processFile } from '@backend/services/file-processor/processor' +import type { FileInput, FileValidationOptions } from '@backend/services/file-processor/types' +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { mkdir, rm } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import sharp from 'sharp' + +describe('file processor', () => { + let tempDir: string + let testFilePath: string + + beforeEach(async () => { + tempDir = join(tmpdir(), 'dataforge-test-' + Date.now()) + await mkdir(tempDir, { recursive: true }) + testFilePath = join(tempDir, 'test-image.jpg') + + // Create a valid 1x1 pixel JPEG using sharp for testing + const jpegBuffer = await sharp({ + create: { + width: 1, + height: 1, + channels: 3, + background: { r: 255, g: 0, b: 0 }, + }, + }) + .jpeg() + .toBuffer() + await Bun.write(testFilePath, jpegBuffer) + }) + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }) + }) + + describe('processFile', () => { + test('should process a valid JPEG file', () => { + const fileInput: FileInput = { + type: 'filepath', + path: testFilePath, + } + + expect(processFile(fileInput)).resolves.toMatchObject({ + metadata: { + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }, + isValid: true, + errors: [], + }) + }) + + test('should handle filepath with basename extraction', () => { + const fileInput: FileInput = { + type: 'filepath', + path: testFilePath, + filename: 'custom-name.jpg', // This will be ignored for filepath type + } + + expect(processFile(fileInput)).resolves.toMatchObject({ + metadata: { + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }, + isValid: true, + errors: [], + }) + }) + + test('should validate file size limits', () => { + const options: Partial = { + maxFileSize: 10, // Very small limit + } + + const fileInput: FileInput = { + type: 'filepath', + path: testFilePath, + } + + expect(processFile(fileInput, options)).resolves.toMatchObject({ + metadata: { + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }, + isValid: false, + }) + }) + + test('should validate allowed MIME types', () => { + const options: Partial = { + allowedMimeTypes: ['image/png'], // Only PNG allowed + } + + const fileInput: FileInput = { + type: 'filepath', + path: testFilePath, + } + + expect(processFile(fileInput, options)).resolves.toMatchObject({ + metadata: { + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }, + isValid: false, + }) + }) + + test('should validate allowed extensions', () => { + const options: Partial = { + allowedExtensions: ['.png'], // Only PNG extension allowed + } + + const fileInput: FileInput = { + type: 'filepath', + path: testFilePath, + } + + expect(processFile(fileInput, options)).resolves.toMatchObject({ + metadata: { + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }, + isValid: false, + }) + }) + + test('should handle URL download failure', () => { + const fileInput: FileInput = { + type: 'url', + url: 'https://httpbin.org/status/404', // This will fail quickly + } + + expect(processFile(fileInput)).rejects.toThrow('Failed to download file from URL') + }) + + test('should handle non-existent file', () => { + const fileInput: FileInput = { + type: 'filepath', + path: '/non/existent/file.jpg', + } + + // File not found should throw an error + expect(processFile(fileInput)).rejects.toThrow('ENOENT: no such file or directory') + }) + }) + + describe('processMultipleFiles', () => { + test('should process multiple valid files', async () => { + const secondFilePath = join(tempDir, 'test-image-2.jpg') + const jpegBuffer = await sharp({ + create: { + width: 2, + height: 2, + channels: 3, + background: { r: 0, g: 255, b: 0 }, + }, + }) + .jpeg() + .toBuffer() + await Bun.write(secondFilePath, jpegBuffer) + + const fileInputs: FileInput[] = [ + { type: 'filepath', path: testFilePath }, + { type: 'filepath', path: secondFilePath }, + ] + + expect(processMultipleFiles(fileInputs)).resolves.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + metadata: expect.objectContaining({ + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }), + isValid: true, + errors: [], + }), + expect.objectContaining({ + metadata: expect.objectContaining({ + filename: 'test-image-2.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }), + isValid: true, + errors: [], + }), + ]), + ) + }) + + test('should handle mixed valid and invalid files', () => { + const fileInputs: FileInput[] = [ + { type: 'filepath', path: testFilePath }, + { type: 'filepath', path: '/non/existent/file.jpg' }, + ] + + expect(processMultipleFiles(fileInputs)).resolves.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + metadata: expect.objectContaining({ + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }), + isValid: true, + errors: [], + }), + expect.objectContaining({ + isValid: false, + errors: expect.any(Array), + }), + ]), + ) + }) + + test('should apply validation options to all files', async () => { + const secondFilePath = join(tempDir, 'test-image-2.jpg') + const jpegBuffer = await sharp({ + create: { + width: 3, + height: 3, + channels: 3, + background: { r: 0, g: 0, b: 255 }, + }, + }) + .jpeg() + .toBuffer() + await Bun.write(secondFilePath, jpegBuffer) + + const fileInputs: FileInput[] = [ + { type: 'filepath', path: testFilePath }, + { type: 'filepath', path: secondFilePath }, + ] + + const options: Partial = { + allowedMimeTypes: ['image/png'], // Only PNG allowed + } + + expect(processMultipleFiles(fileInputs, options)).resolves.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + metadata: expect.objectContaining({ + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }), + isValid: false, + errors: expect.any(Array), + }), + expect.objectContaining({ + metadata: expect.objectContaining({ + filename: 'test-image-2.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + }), + isValid: false, + errors: expect.any(Array), + }), + ]), + ) + }) + }) + + describe('image metadata extraction', () => { + test('should extract image dimensions and metadata for JPEG', () => { + const fileInput: FileInput = { + type: 'filepath', + path: testFilePath, + } + + expect(processFile(fileInput)).resolves.toMatchObject({ + metadata: { + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + imageMetadata: expect.objectContaining({ + width: expect.any(Number), + height: expect.any(Number), + format: expect.any(String), + }), + }, + isValid: true, + errors: [], + }) + }) + }) + + describe('image metadata extraction', () => { + test('should extract image dimensions and metadata for JPEG', () => { + const fileInput: FileInput = { + type: 'filepath', + path: testFilePath, + } + + expect(processFile(fileInput)).resolves.toMatchObject({ + metadata: { + filename: 'test-image.jpg', + mimeType: 'image/jpeg', + extension: '.jpg', + imageMetadata: expect.objectContaining({ + width: expect.any(Number), + height: expect.any(Number), + format: expect.any(String), + }), + }, + isValid: true, + errors: [], + }) + }) + + test('should handle non-image files gracefully', async () => { + const textFilePath = join(tempDir, 'test.txt') + await Bun.write(textFilePath, 'This is a text file') + + const fileInput: FileInput = { + type: 'filepath', + path: textFilePath, + } + + const customOptions = { + allowedMimeTypes: ['text/plain', 'application/octet-stream'], + allowedExtensions: ['.txt'], + } + + expect(processFile(fileInput, customOptions)).resolves.toMatchObject({ + metadata: { + filename: 'test.txt', + mimeType: 'application/octet-stream', + extension: '.txt', + }, + isValid: true, + errors: [], + }) + }) + }) +})